├── scripts ├── instance_names_half.txt ├── ips_half.txt ├── instance_names.txt ├── ips.txt ├── distribute_zip.py ├── run_aria_mp_ycsb.sh ├── run_calvin_mp_ycsb.sh ├── distribute_script.py ├── distribute_script_half.py ├── run_2pl_mp_ycsb.sh ├── run_sundial_mp_ycsb.sh ├── aws_network_latency_pb.py ├── aws_model.py ├── aws_network_latency.py ├── aws_measure_k.py ├── aws_epoch.py ├── gc_lotus_mp_ycsb.py ├── aws_breakdown.py ├── aws_calvin.py ├── aws_latency_pb.py ├── aws_perf_comparison.py ├── aws_epoch_scalability.py ├── gc_sundial_mp_ycsb.py ├── gc_aria_mp_ycsb.py ├── gc_hstore_mp_ycsb.py ├── gc_2pl_mp_ycsb.py ├── gc_calvin_mp_ycsb.py ├── aws_replication.py ├── aws_disk_logging.py ├── aws_perf_comparison_pb.py ├── run_lotus_mp_ycsb.sh ├── aws_scalability.py ├── run_lotus_mp_ycsb_sync.sh └── aws_latency.py ├── compile.sh ├── common ├── Time.cpp ├── ClassOf.h ├── DeferCode.h ├── Operation.h ├── Hash.h ├── SpinLock.h ├── FunctionTraits.h ├── Time.h ├── LockfreeQueue.h ├── Serialization.h ├── Zipf.h ├── Random.h ├── Encoder.h ├── BufferedFileWriter.h ├── Percentile.h ├── ThreadPool.h ├── StringPiece.h ├── FixedString.h ├── MessagePiece.h ├── BufferedReader.h ├── Socket.h └── HashMap.h ├── benchmark ├── ycsb │ ├── Storage.h │ ├── Random.h │ ├── Schema.h │ ├── Context.h │ └── Workload.h └── tpcc │ ├── Storage.h │ ├── Context.h │ ├── Random.h │ └── Workload.h ├── core ├── Defs.h ├── Delay.h ├── Worker.h ├── group_commit │ └── Manager.h ├── Context.h ├── ControlMessage.h └── SchemaDef.h ├── CMakeLists.txt ├── protocol ├── Aria │ ├── Aria.h │ ├── AriaRWKey.h │ └── AriaHelper.h ├── Silo │ ├── SiloHelper.h │ ├── SiloExecutor.h │ └── SiloRWKey.h ├── Calvin │ ├── Calvin.h │ ├── CalvinHelper.h │ └── CalvinManager.h ├── Star │ ├── StarQueryNum.h │ └── StarManager.h ├── SiloGC │ └── SiloGCExecutor.h ├── TwoPL │ ├── TwoPLExecutor.h │ ├── TwoPLHelper.h │ └── TwoPLRWKey.h ├── TwoPLGC │ └── TwoPLGCExecutor.h ├── Sundial │ ├── SundialExecutor.h │ └── SundialRWKey.h └── H-Store │ └── HStoreHelper.h ├── bench_tpcc.cpp ├── README.MD └── bench_ycsb.cpp /scripts/instance_names_half.txt: -------------------------------------------------------------------------------- 1 | node1 node1 2 | node2 node2 3 | node3 node3 -------------------------------------------------------------------------------- /scripts/ips_half.txt: -------------------------------------------------------------------------------- 1 | 1.2.3.4 1.2.3.4 2 | 1.2.3.5 1.2.3.5 3 | 1.2.3.6 1.2.3.6 -------------------------------------------------------------------------------- /compile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | rm -rf CMakeFiles/ CMakeCache.txt 4 | cmake . 5 | make -j 4 6 | -------------------------------------------------------------------------------- /scripts/instance_names.txt: -------------------------------------------------------------------------------- 1 | node1 node1 2 | node2 node2 3 | node3 node3 4 | node4 node4 5 | node5 node5 6 | node6 node6 -------------------------------------------------------------------------------- /scripts/ips.txt: -------------------------------------------------------------------------------- 1 | 1.2.3.4 1.2.3.4 2 | 1.2.3.5 1.2.3.5 3 | 1.2.3.6 1.2.3.6 4 | 1.2.3.7 1.2.3.7 5 | 1.2.3.8 1.2.3.8 6 | 1.2.3.9 1.2.3.9 -------------------------------------------------------------------------------- /common/Time.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 7/22/18. 3 | // 4 | 5 | #include "common/Time.h" 6 | 7 | namespace star { 8 | std::chrono::steady_clock::time_point Time::startTime = std::chrono::steady_clock::now(); 9 | } -------------------------------------------------------------------------------- /common/ClassOf.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 9/5/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | namespace star { 10 | template class ClassOf { 11 | public: 12 | static constexpr std::size_t size() { return sizeof(T); } 13 | }; 14 | } // namespace star -------------------------------------------------------------------------------- /scripts/distribute_zip.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | ips = [line.strip() for line in open("ips.txt", "r")] 5 | n = len(ips) 6 | 7 | ins = [line.split("\t")[0] for line in ips] 8 | outs = [line.split("\t")[1] for line in ips] 9 | 10 | zip = sys.argv[1] 11 | 12 | for ip in outs: 13 | os.system("scp %s ubuntu@%s:~/" %(zip, ip)) 14 | -------------------------------------------------------------------------------- /common/DeferCode.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Xinjing Zhou on 12/11/21. 3 | // 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | namespace star { 10 | 11 | class DeferCode { 12 | public: 13 | DeferCode(std::function f_):f(f_){} 14 | ~DeferCode() { f(); } 15 | private: 16 | std::function f; 17 | }; 18 | 19 | } // namespace star -------------------------------------------------------------------------------- /benchmark/ycsb/Storage.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 9/12/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include "benchmark/ycsb/Schema.h" 8 | 9 | namespace star { 10 | 11 | namespace ycsb { 12 | struct Storage { 13 | ycsb::key ycsb_keys[YCSB_FIELD_SIZE]; 14 | ycsb::value ycsb_values[YCSB_FIELD_SIZE]; 15 | }; 16 | 17 | } // namespace ycsb 18 | } // namespace star -------------------------------------------------------------------------------- /core/Defs.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 9/10/18. 3 | // 4 | 5 | #pragma once 6 | 7 | namespace star { 8 | 9 | enum class ExecutorStatus { 10 | START, 11 | CLEANUP, 12 | C_PHASE, 13 | S_PHASE, 14 | Analysis, 15 | LockRequest, 16 | LockResponse, 17 | Execute, 18 | Kiva_READ, 19 | Kiva_COMMIT, 20 | Aria_COLLECT_XACT, 21 | Aria_READ, 22 | Aria_COMMIT, 23 | AriaFB_READ, 24 | STOP, 25 | EXIT 26 | }; 27 | 28 | enum class TransactionResult { COMMIT, READY_TO_COMMIT, ABORT, ABORT_NORETRY }; 29 | 30 | } // namespace star 31 | -------------------------------------------------------------------------------- /common/Operation.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 9/17/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include "common/Encoder.h" 8 | #include 9 | 10 | namespace star { 11 | 12 | class Operation { 13 | 14 | public: 15 | Operation() : tid(0), partition_id(0) {} 16 | 17 | void clear() { 18 | tid = 0; 19 | partition_id = 0; 20 | data.clear(); 21 | } 22 | 23 | void set_tid(uint64_t id) { tid = id; } 24 | 25 | uint64_t get_tid() const { return tid; } 26 | 27 | public: 28 | uint64_t tid; 29 | std::size_t partition_id; 30 | std::string data; 31 | }; 32 | } // namespace star -------------------------------------------------------------------------------- /common/Hash.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 7/13/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | namespace star { 10 | 11 | template 12 | inline std::size_t hash_combine(const T &v1, const T &v2) { 13 | return v2 ^ (v1 + 0x9e3779b9 + (v2 << 6) + (v2 >> 2)); 14 | } 15 | 16 | template inline std::size_t hash(const T &v) { 17 | return std::hash()(v); 18 | } 19 | 20 | template 21 | inline std::size_t hash(const T &v, Rest... rest) { 22 | std::hash h; 23 | return hash_combine(h(v), hash(rest...)); 24 | } 25 | 26 | } // namespace star 27 | -------------------------------------------------------------------------------- /scripts/run_aria_mp_ycsb.sh: -------------------------------------------------------------------------------- 1 | results_dir=~/star/exp_results/gc_aria_mp_ycsb 2 | mkdir -p $results_dir 3 | cd $results_dir 4 | script_name="gc_aria_mp_ycsb_run" 5 | bsize=400 6 | logging_latency=0 7 | nohup gcloud compute ssh --zone us-central1-a node2 --command "cd star; logging_latency=$logging_latency bsize=$bsize ./$script_name.sh" > $script_name-2.log & 8 | nohup gcloud compute ssh --zone us-central1-a node3 --command "cd star; logging_latency=$logging_latency bsize=$bsize ./$script_name.sh" > $script_name-3.log & 9 | 10 | log_path=$results_dir/$script_name-1.log 11 | cd ~/star 12 | logging_latency=$logging_latency bsize=$bsize ./$script_name.sh > $log_path 2>&1 13 | -------------------------------------------------------------------------------- /scripts/run_calvin_mp_ycsb.sh: -------------------------------------------------------------------------------- 1 | results_dir=~/star/exp_results/gc_calvin_mp_ycsb 2 | mkdir -p $results_dir 3 | cd $results_dir 4 | script_name="gc_calvin_mp_ycsb_run" 5 | bsize=400 6 | logging_latency=0 7 | nohup gcloud compute ssh --zone us-central1-a node2 --command "cd star;logging_latency=$logging_latency bsize=$bsize ./$script_name.sh" > $script_name-2.log & 8 | nohup gcloud compute ssh --zone us-central1-a node3 --command "cd star;logging_latency=$logging_latency bsize=$bsize ./$script_name.sh" > $script_name-3.log & 9 | 10 | log_path=$results_dir/$script_name-1.log 11 | cd ~/star 12 | logging_latency=$logging_latency bsize=$bsize ./$script_name.sh > $log_path 2>&1 13 | -------------------------------------------------------------------------------- /scripts/distribute_script.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | ips = [line.strip() for line in open("instance_names.txt", "r")] 5 | n = len(ips) 6 | 7 | ins = [line.split("\t")[0] for line in ips] 8 | outs = [line.split("\t")[1] for line in ips] 9 | 10 | port = int(sys.argv[1]) 11 | script = sys.argv[2] 12 | gc_zone_name = sys.argv[3] 13 | script_base = os.path.basename(script) 14 | script_no_extension = os.path.splitext(script_base)[0] 15 | print(script_no_extension) 16 | for i in range(n): 17 | os.system("python %s %d %d > run.sh" % (script, i, port)) 18 | os.system("chmod u+x run.sh") 19 | os.system("gcloud compute scp --zone %s run.sh %s:~/star/%s_run.sh" % (gc_zone_name, outs[i], script_no_extension)) 20 | #os.system("scp run.sh ubuntu@%s:~/star/run.sh" % outs[i]) 21 | -------------------------------------------------------------------------------- /scripts/distribute_script_half.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | ips = [line.strip() for line in open("instance_names_half.txt", "r")] 5 | n = len(ips) 6 | 7 | ins = [line.split("\t")[0] for line in ips] 8 | outs = [line.split("\t")[1] for line in ips] 9 | 10 | port = int(sys.argv[1]) 11 | script = sys.argv[2] 12 | gc_zone_name = sys.argv[3] 13 | script_base = os.path.basename(script) 14 | script_no_extension = os.path.splitext(script_base)[0] 15 | print(script_no_extension) 16 | 17 | for i in range(n): 18 | os.system("python %s %d %d > run.sh" % (script, i, port)) 19 | os.system("chmod u+x run.sh") 20 | os.system("gcloud compute scp --zone %s run.sh %s:~/star/%s_run.sh" % (gc_zone_name, outs[i], script_no_extension)) 21 | #os.system("scp run.sh ubuntu@%s:~/star/run.sh" % outs[i]) 22 | -------------------------------------------------------------------------------- /common/SpinLock.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 7/14/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | namespace star { 11 | class SpinLock { 12 | public: 13 | // constructors 14 | SpinLock() = default; 15 | 16 | SpinLock(const SpinLock &) = delete; // non construction-copyable 17 | SpinLock &operator=(const SpinLock &) = delete; // non copyable 18 | 19 | // Modifiers 20 | void lock() { 21 | while (lock_.test_and_set(std::memory_order_acquire)) 22 | ; 23 | } 24 | 25 | void unlock() { lock_.clear(std::memory_order_release); } 26 | 27 | // friend declaration 28 | friend std::ostream &operator<<(std::ostream &, const SpinLock &); 29 | 30 | private: 31 | std::atomic_flag lock_ = ATOMIC_FLAG_INIT; 32 | }; 33 | } // namespace star 34 | -------------------------------------------------------------------------------- /scripts/run_2pl_mp_ycsb.sh: -------------------------------------------------------------------------------- 1 | results_dir=~star/exp_results/gc_2pl_mp_ycsb 2 | mkdir -p $results_dir 3 | cd $results_dir 4 | script_name="gc_2pl_mp_ycsb_run" 5 | 6 | nohup gcloud compute ssh --zone us-central1-a node2 --command "cd star; ./$script_name.sh" > $script_name-2.log & 7 | nohup gcloud compute ssh --zone us-central1-a node3 --command "cd star; ./$script_name.sh" > $script_name-3.log & 8 | nohup gcloud compute ssh --zone us-central1-a node4 --command "cd star; ./$script_name.sh" > $script_name-4.log & 9 | nohup gcloud compute ssh --zone us-central1-a node5 --command "cd star; ./$script_name.sh" > $script_name-5.log & 10 | nohup gcloud compute ssh --zone us-central1-a node6 --command "cd star; ./$script_name.sh" > $script_name-6.log & 11 | 12 | 13 | log_path=$results_dir/$script_name-1.log 14 | cd ~/star 15 | sh ./$script_name.sh > $log_path 2>&1 16 | -------------------------------------------------------------------------------- /scripts/run_sundial_mp_ycsb.sh: -------------------------------------------------------------------------------- 1 | results_dir=~/star/exp_results/gc_sundial_mp_ycsb 2 | mkdir -p $results_dir 3 | cd $results_dir 4 | script_name="gc_sundial_mp_ycsb_run" 5 | nohup gcloud compute ssh --zone us-central1-a node2 --command "cd star; ./$script_name.sh" > $script_name-2.log & 6 | nohup gcloud compute ssh --zone us-central1-a node3 --command "cd star; ./$script_name.sh" > $script_name-3.log & 7 | nohup gcloud compute ssh --zone us-central1-a node4 --command "cd star; ./$script_name.sh" > $script_name-4.log & 8 | nohup gcloud compute ssh --zone us-central1-a node5 --command "cd star; ./$script_name.sh" > $script_name-5.log & 9 | nohup gcloud compute ssh --zone us-central1-a node6 --command "cd star; ./$script_name.sh" > $script_name-6.log & 10 | 11 | 12 | log_path=$results_dir/$script_name-1.log 13 | cd ~/star 14 | sh ./$script_name.sh > $log_path 2>&1 15 | -------------------------------------------------------------------------------- /scripts/aws_network_latency_pb.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | ips = [line.strip() for line in open("ips.txt", "r")][:2] 4 | n = len(ips) 5 | 6 | ins = [line.split("\t")[0] for line in ips] 7 | outs = [line.split("\t")[1] for line in ips] 8 | 9 | id = int(sys.argv[1]) 10 | port = int(sys.argv[2]) 11 | 12 | delays = [100, 200, 500, 1000, 2000, 5000, 10000] 13 | 14 | def get_cmd(n, i): 15 | cmd = "" 16 | for j in range(n): 17 | if j > 0: 18 | cmd += ";" 19 | if id == j: 20 | cmd += ins[j] + ":" + str(port+i) 21 | else: 22 | cmd += outs[j] + ":" + str(port+i) 23 | return cmd 24 | 25 | 26 | for delay in delays: 27 | for i in range(3): 28 | cmd = get_cmd(n, i) 29 | print('./bench_tpcc --logtostderr=1 --id=%d --servers="%s" --protocol=SiloGC --partition_num=%d --threads=12 --partitioner=pb --group_time=100 --delay=%d --query=mixed --neworder_dist=10 --payment_dist=15' % (id, cmd, 48, delay)) -------------------------------------------------------------------------------- /scripts/aws_model.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | ips = [line.strip() for line in open("ips.txt", "r")] 4 | 5 | id = int(sys.argv[1]) 6 | port = int(sys.argv[2]) 7 | 8 | 9 | protocols = ["Star"] 10 | ns = [16, 14, 12, 10, 8, 7, 6, 5, 4, 3, 2] 11 | ps = [1, 5, 10, 15] 12 | 13 | for n in ns: 14 | if id >= n: 15 | break 16 | ins = [line.split("\t")[0] for line in ips[0:n]] 17 | outs = [line.split("\t")[1] for line in ips[0:n]] 18 | for p in ps: 19 | for i in range(3): 20 | cmd = "" 21 | for j in range(n): 22 | if j > 0: 23 | cmd += ";" 24 | if id == j: 25 | cmd += ins[j] + ":" + str(port+i) 26 | else: 27 | cmd += outs[j] + ":" + str(port+i) 28 | print('./bench_ycsb --logtostderr=1 --id=%d --servers="%s" --protocol=Star --partition_num=%d --partitioner=hash2 --threads=12 --read_write_ratio=90 --cross_ratio=%d --batch_size=1000 --batch_flush=200' % (id, cmd, 12*n, p)) 29 | -------------------------------------------------------------------------------- /benchmark/ycsb/Random.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 7/14/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | #include "common/Random.h" 11 | 12 | namespace star { 13 | namespace ycsb { 14 | class Random : public star::Random { 15 | public: 16 | using star::Random::Random; 17 | 18 | std::string rand_str(std::size_t length) { 19 | auto &characters_ = characters(); 20 | auto characters_len = characters_.length(); 21 | std::string result; 22 | for (auto i = 0u; i < length; i++) { 23 | int k = uniform_dist(0, characters_len - 1); 24 | result += characters_[k]; 25 | } 26 | return result; 27 | } 28 | 29 | private: 30 | static const std::string &characters() { 31 | static std::string characters_ = 32 | "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; 33 | return characters_; 34 | }; 35 | }; 36 | } // namespace ycsb 37 | } // namespace star 38 | -------------------------------------------------------------------------------- /common/FunctionTraits.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 7/14/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | namespace star { 10 | template 11 | struct FunctionTraits : public FunctionTraits {}; 12 | 13 | template 14 | struct FunctionTraits { 15 | enum { arity = sizeof...(Args) }; 16 | 17 | typedef ReturnType return_type; 18 | 19 | template struct arg { 20 | typedef typename std::tuple_element>::type type; 21 | }; 22 | }; 23 | 24 | template 25 | using ReturnType = typename FunctionTraits::return_type; 26 | 27 | template 28 | using Argument0 = typename FunctionTraits::template arg<0>::type; 29 | 30 | template 31 | using Argument1 = typename FunctionTraits::template arg<1>::type; 32 | 33 | } // namespace star 34 | -------------------------------------------------------------------------------- /benchmark/tpcc/Storage.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 9/12/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include "benchmark/tpcc/Schema.h" 8 | 9 | namespace star { 10 | namespace tpcc { 11 | struct Storage { 12 | warehouse::key warehouse_key; 13 | warehouse::value warehouse_value; 14 | 15 | district::key district_key; 16 | district::value district_value; 17 | 18 | customer_name_idx::key customer_name_idx_key; 19 | customer_name_idx::value customer_name_idx_value; 20 | 21 | customer::key customer_key; 22 | customer::value customer_value; 23 | 24 | item::key item_keys[15]; 25 | item::value item_values[15]; 26 | 27 | stock::key stock_keys[15]; 28 | stock::value stock_values[15]; 29 | 30 | new_order::key new_order_key; 31 | 32 | order::key order_key; 33 | order::value order_value; 34 | 35 | order_line::key order_line_keys[15]; 36 | order_line::value order_line_values[15]; 37 | 38 | history::key h_key; 39 | history::value h_value; 40 | }; 41 | } // namespace tpcc 42 | } // namespace star -------------------------------------------------------------------------------- /scripts/aws_network_latency.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | ips = [line.strip() for line in open("ips.txt", "r")] 4 | n = len(ips) 5 | 6 | ins = [line.split("\t")[0] for line in ips] 7 | outs = [line.split("\t")[1] for line in ips] 8 | 9 | id = int(sys.argv[1]) 10 | port = int(sys.argv[2]) 11 | 12 | protocols = ["Star", "SiloGC", "TwoPLGC"] 13 | delays = [100, 200, 500, 1000, 2000, 5000, 10000] 14 | 15 | def get_cmd(n, i): 16 | cmd = "" 17 | for j in range(n): 18 | if j > 0: 19 | cmd += ";" 20 | if id == j: 21 | cmd += ins[j] + ":" + str(port+i) 22 | else: 23 | cmd += outs[j] + ":" + str(port+i) 24 | return cmd 25 | 26 | 27 | for protocol in protocols: 28 | for delay in delays: 29 | for i in range(3): 30 | cmd = get_cmd(n, i) 31 | print('./bench_tpcc --logtostderr=1 --id=%d --servers="%s" --protocol=%s --partition_num=%d --threads=12 --partitioner=hash2 --group_time=100 --delay=%d --query=mixed --neworder_dist=10 --payment_dist=15' % (id, cmd, protocol, 12*n, delay)) 32 | -------------------------------------------------------------------------------- /scripts/aws_measure_k.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | ips = [line.strip() for line in open("ips.txt", "r")] 4 | n = len(ips) 5 | 6 | ins = [line.split("\t")[0] for line in ips] 7 | outs = [line.split("\t")[1] for line in ips] 8 | 9 | id = int(sys.argv[1]) 10 | port = int(sys.argv[2]) 11 | 12 | protocols = ["Star", "SiloGC", "TwoPLGC"] 13 | 14 | def get_cmd(n, i): 15 | cmd = "" 16 | for j in range(n): 17 | if j > 0: 18 | cmd += ";" 19 | if id == j: 20 | cmd += ins[j] + ":" + str(port+i) 21 | else: 22 | cmd += outs[j] + ":" + str(port+i) 23 | return cmd 24 | 25 | ix = 0 26 | 27 | 28 | for protocol in protocols: 29 | cmd = get_cmd(n, ix) 30 | ix += 1 31 | print('./bench_ycsb --logtostderr=1 --id=%d --servers="%s" --protocol=%s --partition_num=%d --threads=12 --partitioner=hash2 --read_write_ratio=90 --cross_ratio=10 --batch_flush=200' % (id, cmd, protocol, 12*n)) 32 | 33 | 34 | for protocol in protocols: 35 | cmd = get_cmd(n, ix) 36 | ix += 1 37 | print('./bench_tpcc --logtostderr=1 --id=%d --servers="%s" --protocol=%s --partition_num=%d --threads=12 --partitioner=hash2 --query=mixed --neworder_dist=10 --payment_dist=15' % (id, cmd, protocol, 12*n)) -------------------------------------------------------------------------------- /scripts/aws_epoch.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | ips = [line.strip() for line in open("ips.txt", "r")] 4 | n = len(ips) 5 | 6 | ins = [line.split("\t")[0] for line in ips] 7 | outs = [line.split("\t")[1] for line in ips] 8 | 9 | id = int(sys.argv[1]) 10 | port = int(sys.argv[2]) 11 | 12 | epochs = [1, 2, 5, 10, 20, 50, 100, 200] 13 | 14 | def get_cmd(n, i): 15 | cmd = "" 16 | for j in range(n): 17 | if j > 0: 18 | cmd += ";" 19 | if id == j: 20 | cmd += ins[j] + ":" + str(port+i) 21 | else: 22 | cmd += outs[j] + ":" + str(port+i) 23 | return cmd 24 | 25 | 26 | #for epoch in epochs: 27 | # for i in range(3): 28 | # cmd = get_cmd(n, i) 29 | # print('./bench_tpcc --logtostderr=1 --id=%d --servers="%s" --protocol=Star --partition_num=%d --threads=12 --group_time=%d --query=mixed --neworder_dist=10 --payment_dist=15' % (id, cmd, 12*n, epoch)) 30 | 31 | for epoch in epochs: 32 | for i in range(3): 33 | cmd = get_cmd(n, i) 34 | print('./bench_ycsb --logtostderr=1 --id=%d --servers="%s" --protocol=Star --partition_num=%d --threads=12 --group_time=%d --read_write_ratio=90 --cross_ratio=10 --batch_flush=200' % (id, cmd, 12*n, epoch)) 35 | -------------------------------------------------------------------------------- /common/Time.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 7/22/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | namespace star { 10 | 11 | class Time { 12 | public: 13 | static uint64_t now() { 14 | auto now = std::chrono::steady_clock::now(); 15 | return std::chrono::duration_cast(now - startTime) 16 | .count(); 17 | } 18 | 19 | static std::chrono::steady_clock::time_point startTime; 20 | }; 21 | 22 | class ScopedTimer { 23 | public: 24 | ScopedTimer(std::function f) : call_on_destructor(f) { 25 | startTime = std::chrono::steady_clock::now(); 26 | } 27 | 28 | void reset() { 29 | startTime = std::chrono::steady_clock::now(); 30 | ended = false; 31 | } 32 | void end() { 33 | auto us = std::chrono::duration_cast(std::chrono::steady_clock::now() - startTime) 34 | .count(); 35 | call_on_destructor(us); 36 | ended = true; 37 | } 38 | 39 | ~ScopedTimer() { 40 | if (!ended) { 41 | end(); 42 | } 43 | } 44 | bool ended = false; 45 | std::chrono::steady_clock::time_point startTime; 46 | std::function call_on_destructor; 47 | }; 48 | 49 | } // namespace star 50 | -------------------------------------------------------------------------------- /core/Delay.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 9/28/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | namespace star { 10 | 11 | // delay in us from the sender side 12 | 13 | class Delay { 14 | 15 | public: 16 | Delay(std::size_t coordinator_id, std::size_t coordinator_num) { 17 | DCHECK(coordinator_id < coordinator_num); 18 | this->coordinator_id = coordinator_id; 19 | this->coordinator_num = coordinator_num; 20 | } 21 | 22 | virtual ~Delay() = default; 23 | 24 | virtual int64_t message_delay() const = 0; 25 | 26 | virtual bool delay_enabled() const = 0; 27 | 28 | protected: 29 | std::size_t coordinator_id; 30 | std::size_t coordinator_num; 31 | }; 32 | 33 | class SameDelay : public Delay { 34 | 35 | public: 36 | SameDelay(std::size_t coordinator_id, std::size_t coordinator_num, 37 | int64_t delay_time) 38 | : Delay(coordinator_id, coordinator_num), delay_time(delay_time) { 39 | DCHECK(delay_time >= 0); 40 | } 41 | 42 | virtual ~SameDelay() = default; 43 | 44 | int64_t message_delay() const override { return delay_time; } 45 | 46 | bool delay_enabled() const override { return delay_time != 0; } 47 | 48 | protected: 49 | int64_t delay_time; 50 | }; 51 | 52 | } // namespace star -------------------------------------------------------------------------------- /scripts/gc_lotus_mp_ycsb.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | ips = [line.strip() for line in open("ips.txt", "r")] 4 | n = len(ips) 5 | 6 | ins = [line.split("\t")[0] for line in ips] 7 | outs = [line.split("\t")[1] for line in ips] 8 | 9 | id = int(sys.argv[1]) 10 | port = int(sys.argv[2]) 11 | 12 | ratios = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100] 13 | #ratios = [40, 60, 70] 14 | #ratios = [0, 5, 10, 25, 50, 75, 100] 15 | def get_cmd(n, i): 16 | cmd = "" 17 | for j in range(n): 18 | if j > 0: 19 | cmd += ";" 20 | if id == j: 21 | cmd += ins[j] + ":" + str(port+i) 22 | else: 23 | cmd += outs[j] + ":" + str(port+i) 24 | return cmd 25 | 26 | for i,ratio in enumerate(ratios): 27 | cmd = get_cmd(n, i) 28 | print('./bench_ycsb --logtostderr=1 --id=%d --servers="%s" --threads=6 --read_write_ratio=50 --partition_num=$partition_num --keys=100000 --granule_count=$granule_count --log_path=/mnt/disks/nvme/coord0 --persist_latency=$logging_latency --wal_group_commit_time=1000 --wal_group_commit_size=0 --partitioner=hpb --hstore_command_logging=true --protocol=HStore --replica_group=3 --lock_manager=1 --batch_size=$bsize --batch_flush=1 --cross_ratio=%d --lotus_async_repl=$lotus_async_repl --cross_part_num=$cross_part_num' % (id, cmd, ratio) ) 29 | print('sleep 10s') 30 | -------------------------------------------------------------------------------- /benchmark/tpcc/Context.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 7/19/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include "core/Context.h" 8 | 9 | namespace star { 10 | namespace tpcc { 11 | 12 | enum class TPCCWorkloadType { NEW_ORDER_ONLY, PAYMENT_ONLY, MIXED }; 13 | 14 | class Context : public star::Context { 15 | public: 16 | TPCCWorkloadType workloadType = TPCCWorkloadType::NEW_ORDER_ONLY; 17 | 18 | Context get_single_partition_context() const { 19 | Context c = *this; 20 | c.newOrderCrossPartitionProbability = 0; 21 | c.paymentCrossPartitionProbability = 0; 22 | c.operation_replication = this->operation_replication; 23 | c.star_sync_in_single_master_phase = false; 24 | return c; 25 | } 26 | 27 | Context get_cross_partition_context() const { 28 | Context c = *this; 29 | c.newOrderCrossPartitionProbability = 100; 30 | c.paymentCrossPartitionProbability = 100; 31 | c.operation_replication = false; 32 | c.star_sync_in_single_master_phase = this->star_sync_in_single_master_phase; 33 | return c; 34 | } 35 | 36 | std::size_t getGranule(std::size_t key) { return key;} 37 | 38 | int newOrderCrossPartitionProbability = 10; // out of 100 39 | int paymentCrossPartitionProbability = 15; // out of 100 40 | }; 41 | } // namespace tpcc 42 | } // namespace star 43 | -------------------------------------------------------------------------------- /common/LockfreeQueue.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 8/29/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include "glog/logging.h" 8 | #include 9 | 10 | namespace star { 11 | 12 | /* 13 | * boost::lockfree::spsc_queue does not support move only objects, e.g., 14 | * std::unique_ptr. As a result, only Message* can be pushed into 15 | * MessageQueue. Usage: std::unique_ptr ptr; MessageQueue q; 16 | * q.push(ptr.release()); 17 | * 18 | * std::unique_ptr ptr1(q.front()); 19 | * q.pop(); 20 | * 21 | */ 22 | 23 | template 24 | class LockfreeQueue 25 | : public boost::lockfree::spsc_queue> { 26 | public: 27 | using element_type = T; 28 | using base_type = 29 | boost::lockfree::spsc_queue>; 30 | 31 | void push(const T &value) { 32 | while (base_type::write_available() == 0) { 33 | nop_pause(); 34 | } 35 | bool ok = base_type::push(value); 36 | CHECK(ok); 37 | } 38 | 39 | void wait_till_non_empty() { 40 | while (base_type::empty()) { 41 | nop_pause(); 42 | } 43 | } 44 | 45 | auto capacity() { return N; } 46 | 47 | private: 48 | void nop_pause() { __asm volatile("pause" : :); } 49 | }; 50 | } // namespace star 51 | -------------------------------------------------------------------------------- /scripts/aws_breakdown.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | ips = [line.strip() for line in open("ips.txt", "r")] 4 | n = len(ips) 5 | 6 | ins = [line.split("\t")[0] for line in ips] 7 | outs = [line.split("\t")[1] for line in ips] 8 | 9 | id = int(sys.argv[1]) 10 | port = int(sys.argv[2]) 11 | 12 | protocols = ["SiloGC", "TwoPLGC"] 13 | ratios = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100] 14 | 15 | def get_cmd(n, i): 16 | cmd = "" 17 | for j in range(n): 18 | if j > 0: 19 | cmd += ";" 20 | if id == j: 21 | cmd += ins[j] + ":" + str(port+i) 22 | else: 23 | cmd += outs[j] + ":" + str(port+i) 24 | return cmd 25 | 26 | for protocol in protocols: 27 | for i in range(len(ratios)): 28 | ratio = ratios[i] 29 | cmd = get_cmd(n, i) 30 | print('./bench_ycsb --logtostderr=1 --id=%d --servers="%s" --protocol=%s --partition_num=%d --threads=12 --partitioner=hash2 --read_write_ratio=90 --cross_ratio=%d' % (id, cmd, protocol, 12*n, ratio)) 31 | 32 | for protocol in protocols: 33 | for i in range(len(ratios)): 34 | ratio = ratios[i] 35 | cmd = get_cmd(n, i) 36 | print('./bench_tpcc --logtostderr=1 --id=%d --servers="%s" --protocol=%s --partition_num=%d --threads=12 --partitioner=hash2 --query=mixed --neworder_dist=%d --payment_dist=%d' % (id, cmd, protocol, 12*n, ratio, ratio)) 37 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.2) 2 | project(star) 3 | 4 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -Wall -Wno-long-long -Wno-unused-variable -fno-omit-frame-pointer -Wno-variadic-macros -pedantic -O3 -DNDEBUG -g3") 5 | set(CMAKE_CXX_STANDARD 14) 6 | # -O3 -DNDEBUG 7 | find_library(jemalloc_lib jemalloc) # jemalloc 5.0 8 | 9 | # additional target to perform clang-format run, requires clang-format 10 | 11 | # get all project files 12 | file(GLOB_RECURSE ALL_SOURCE_FILES benchmark/*.h common/*.h core/*.h protocol/*.h bench*.cpp) 13 | 14 | add_custom_target( 15 | format 16 | COMMAND clang-format 17 | -style=LLVM 18 | -i 19 | -sort-includes 20 | ${ALL_SOURCE_FILES} 21 | ) 22 | 23 | include_directories(${CMAKE_SOURCE_DIR}) 24 | 25 | file(GLOB_RECURSE COMMON_SOURCE_FILES common/*.cpp) 26 | add_library(common STATIC ${COMMON_SOURCE_FILES}) 27 | 28 | if(APPLE) 29 | find_package(glog REQUIRED) 30 | find_package(gflags REQUIRED) 31 | target_link_libraries(common ${jemalloc_lib} glog::glog gflags) 32 | else() 33 | target_link_libraries(common ${jemalloc_lib} glog gflags) 34 | endif() 35 | 36 | add_executable(bench_tpcc bench_tpcc.cpp) 37 | target_link_libraries(bench_tpcc common) 38 | 39 | add_executable(bench_ycsb bench_ycsb.cpp) 40 | target_link_libraries(bench_ycsb common) -------------------------------------------------------------------------------- /scripts/aws_calvin.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | ips = [line.strip() for line in open("ips.txt", "r")] 4 | n = len(ips) 5 | 6 | ins = [line.split("\t")[0] for line in ips] 7 | outs = [line.split("\t")[1] for line in ips] 8 | 9 | id = int(sys.argv[1]) 10 | port = int(sys.argv[2]) 11 | 12 | ratios = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100] 13 | locks = [2, 4, 6] 14 | 15 | def get_cmd(n, i): 16 | cmd = "" 17 | for j in range(n): 18 | if j > 0: 19 | cmd += ";" 20 | if id == j: 21 | cmd += ins[j] + ":" + str(port+i) 22 | else: 23 | cmd += outs[j] + ":" + str(port+i) 24 | return cmd 25 | 26 | for lock in locks: 27 | for ratio in ratios: 28 | for i in range(3): 29 | cmd = get_cmd(n, i) 30 | print('./bench_ycsb --logtostderr=1 --id=%d --servers="%s" --protocol=Calvin --partition_num=%d --threads=12 --batch_size=10000 --replica_group=4 --lock_manager=%d --read_write_ratio=90 --cross_ratio=%d' % (id, cmd, 12*n, lock, ratio)) 31 | 32 | 33 | for lock in locks: 34 | for ratio in ratios: 35 | for i in range(3): 36 | cmd = get_cmd(n, i) 37 | print('./bench_tpcc --logtostderr=1 --id=%d --servers="%s" --protocol=Calvin --partition_num=%d --threads=12 --batch_size=10000 --replica_group=4 --lock_manager=%d --query=mixed --neworder_dist=%d --payment_dist=%d' % (id, cmd, 12*n, lock, ratio, ratio)) 38 | -------------------------------------------------------------------------------- /common/Serialization.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 7/17/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | #include "StringPiece.h" 11 | 12 | namespace star { 13 | template class Serializer { 14 | public: 15 | std::string operator()(const T &v) { 16 | std::string result(sizeof(T), 0); 17 | memcpy(&result[0], &v, sizeof(T)); 18 | return result; 19 | } 20 | }; 21 | 22 | template class Deserializer { 23 | public: 24 | std::size_t operator()(StringPiece str, T &result) const { 25 | std::memcpy(&result, str.data(), sizeof(T)); 26 | return sizeof(T); 27 | } 28 | }; 29 | 30 | template <> class Serializer { 31 | public: 32 | std::string operator()(const std::string &v) { 33 | return Serializer()(v.size()) + v; 34 | } 35 | }; 36 | 37 | template <> class Deserializer { 38 | public: 39 | std::size_t operator()(StringPiece str, std::string &result) const { 40 | std::string::size_type string_length; 41 | std::size_t size = 42 | Deserializer()(str, string_length); 43 | size += string_length; 44 | str.remove_prefix(sizeof(string_length)); 45 | result = std::string(str.begin(), str.begin() + string_length); 46 | return size; 47 | } 48 | }; 49 | 50 | } // namespace star 51 | -------------------------------------------------------------------------------- /scripts/aws_latency_pb.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | ips = [line.strip() for line in open("ips.txt", "r")][:2] 4 | n = len(ips) 5 | 6 | ins = [line.split("\t")[0] for line in ips] 7 | outs = [line.split("\t")[1] for line in ips] 8 | 9 | id = int(sys.argv[1]) 10 | port = int(sys.argv[2]) 11 | 12 | ratios = [10, 50, 90] 13 | 14 | def get_cmd(n, i): 15 | cmd = "" 16 | for j in range(n): 17 | if j > 0: 18 | cmd += ";" 19 | if id == j: 20 | cmd += ins[j] + ":" + str(port+i) 21 | else: 22 | cmd += outs[j] + ":" + str(port+i) 23 | return cmd 24 | 25 | 26 | for i in range(len(ratios)): 27 | ratio = ratios[i] 28 | cmd = get_cmd(n, i) 29 | print('./bench_ycsb --logtostderr=1 --id=%d --servers="%s" --protocol=Silo --partition_num=%d --threads=12 --partitioner=pb --read_write_ratio=90 --cross_ratio=%d' % (id, cmd, 48, ratio)) 30 | 31 | for i in range(len(ratios)): 32 | ratio = ratios[i] 33 | cmd = get_cmd(n, i) 34 | print('./bench_tpcc --logtostderr=1 --id=%d --servers="%s" --protocol=Silo --partition_num=%d --threads=12 --partitioner=pb --query=mixed --neworder_dist=%d --payment_dist=%d' % (id, cmd, 48, ratio, ratio)) 35 | 36 | 37 | cmd = get_cmd(n, 0) 38 | print('./bench_ycsb --logtostderr=1 --id=%d --servers="%s" --protocol=Silo --partition_num=%d --threads=12 --partitioner=pb --read_write_ratio=90 --cross_ratio=10' % (id, cmd, 48)) -------------------------------------------------------------------------------- /scripts/aws_perf_comparison.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | ips = [line.strip() for line in open("ips.txt", "r")] 4 | n = len(ips) 5 | 6 | ins = [line.split("\t")[0] for line in ips] 7 | outs = [line.split("\t")[1] for line in ips] 8 | 9 | id = int(sys.argv[1]) 10 | port = int(sys.argv[2]) 11 | 12 | #protocols = ["Star", "SiloGC", "TwoPLGC"] 13 | protocols = ["Star"] 14 | ratios = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100] 15 | 16 | def get_cmd(n, i): 17 | cmd = "" 18 | for j in range(n): 19 | if j > 0: 20 | cmd += ";" 21 | if id == j: 22 | cmd += ins[j] + ":" + str(port+i) 23 | else: 24 | cmd += outs[j] + ":" + str(port+i) 25 | return cmd 26 | 27 | for protocol in protocols: 28 | for ratio in ratios: 29 | for i in range(3): 30 | cmd = get_cmd(n, i) 31 | print('./bench_ycsb --logtostderr=1 --id=%d --servers="%s" --protocol=%s --partition_num=%d --threads=12 --partitioner=hash2 --read_write_ratio=90 --cross_ratio=%d --batch_flush=200' % (id, cmd, protocol, 12*n, ratio)) 32 | 33 | #for protocol in protocols: 34 | # for ratio in ratios: 35 | # for i in range(3): 36 | # cmd = get_cmd(n, i) 37 | # print('./bench_tpcc --logtostderr=1 --id=%d --servers="%s" --protocol=%s --partition_num=%d --threads=12 --partitioner=hash2 --query=mixed --neworder_dist=%d --payment_dist=%d' % (id, cmd, protocol, 12*n, ratio, ratio)) 38 | 39 | 40 | -------------------------------------------------------------------------------- /common/Zipf.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 7/19/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | namespace star { 11 | 12 | class Zipf { 13 | public: 14 | void init(int n, double theta) { 15 | hasInit = true; 16 | 17 | n_ = n; 18 | theta_ = theta; 19 | alpha_ = 1.0 / (1.0 - theta_); 20 | zetan_ = zeta(n_); 21 | eta_ = (1.0 - std::pow(2.0 / n_, 1.0 - theta_)) / (1.0 - zeta(2) / zetan_); 22 | } 23 | 24 | int value(double u) { 25 | CHECK(hasInit); 26 | 27 | double uz = u * zetan_; 28 | int v; 29 | if (uz < 1) { 30 | v = 0; 31 | } else if (uz < 1 + std::pow(0.5, theta_)) { 32 | v = 1; 33 | } else { 34 | v = static_cast(n_ * std::pow(eta_ * u - eta_ + 1, alpha_)); 35 | } 36 | DCHECK(v >= 0 && v < n_); 37 | return v; 38 | } 39 | 40 | static Zipf &globalZipf() { 41 | static Zipf z; 42 | return z; 43 | } 44 | 45 | static Zipf &globalZipfForStraggler() { 46 | static Zipf z; 47 | return z; 48 | } 49 | 50 | private: 51 | double zeta(int n) { 52 | DCHECK(hasInit); 53 | 54 | double sum = 0; 55 | 56 | for (auto i = 1; i <= n; i++) { 57 | sum += std::pow(1.0 / i, theta_); 58 | } 59 | 60 | return sum; 61 | } 62 | 63 | bool hasInit = false; 64 | 65 | int n_; 66 | double theta_; 67 | double alpha_; 68 | double zetan_; 69 | double eta_; 70 | }; 71 | } // namespace star 72 | -------------------------------------------------------------------------------- /scripts/aws_epoch_scalability.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | ips = [line.strip() for line in open("ips.txt", "r")] 4 | 5 | id = int(sys.argv[1]) 6 | port = int(sys.argv[2]) 7 | 8 | #ns = [16, 14, 12, 10, 8, 6, 4, 2] 9 | #ns = [16, 14, 12, 10] 10 | ns = [8, 6, 4, 2] 11 | group_times = [10, 20, 50, 100, 200] 12 | 13 | 14 | for n in ns: 15 | if id >= n: 16 | break 17 | ins = [line.split("\t")[0] for line in ips[0:n]] 18 | outs = [line.split("\t")[1] for line in ips[0:n]] 19 | for t in group_times: 20 | for i in range(3): 21 | cmd = "" 22 | for j in range(n): 23 | if j > 0: 24 | cmd += ";" 25 | if id == j: 26 | cmd += ins[j] + ":" + str(port+i) 27 | else: 28 | cmd += outs[j] + ":" + str(port+i) 29 | 30 | print('./bench_ycsb --logtostderr=1 --id=%d --servers="%s" --protocol=Star --partition_num=%d --threads=12 --cross_ratio=10 --read_write_ratio=90 --group_time=%d --batch_size=1000 --batch_flush=200' % (id, cmd, 12*n, t)) 31 | 32 | for i in range(3): 33 | cmd = "" 34 | for j in range(n): 35 | if j > 0: 36 | cmd += ";" 37 | if id == j: 38 | cmd += ins[j] + ":" + str(port+i) 39 | else: 40 | cmd += outs[j] + ":" + str(port+i) 41 | 42 | print('./bench_tpcc --logtostderr=1 --id=%d --servers="%s" --protocol=Star --partition_num=%d --threads=12 --query=mixed --neworder_dist=10 --payment_dist=15 --group_time=%d --batch_size=1000' % (id, cmd, 12*n, t)) 43 | -------------------------------------------------------------------------------- /scripts/gc_sundial_mp_ycsb.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | ips = [line.strip() for line in open("ips.txt", "r")] 4 | n = len(ips) 5 | 6 | ins = [line.split("\t")[0] for line in ips] 7 | outs = [line.split("\t")[1] for line in ips] 8 | 9 | id = int(sys.argv[1]) 10 | port = int(sys.argv[2]) 11 | 12 | ratios = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100] 13 | def get_cmd(n, i): 14 | cmd = "" 15 | for j in range(n): 16 | if j > 0: 17 | cmd += ";" 18 | if id == j: 19 | cmd += ins[j] + ":" + str(port+i) 20 | else: 21 | cmd += outs[j] + ":" + str(port+i) 22 | return cmd 23 | 24 | for i,ratio in enumerate(ratios): 25 | cmd = get_cmd(n, i) 26 | print('./bench_ycsb --logtostderr=1 --id=%d --servers="%s" --threads=6 --read_write_ratio=50 --partition_num=18 --keys=100000 --granule_count=1 --log_path=/mnt/disks/nvme/coord0 --persist_latency=0 --wal_group_commit_time=1000 --wal_group_commit_size=3 --partitioner=hash2 --hstore_command_logging=false --protocol=Sundial --replica_group=3 --lock_manager=1 --batch_flush=1 --cross_ratio=%d --lotus_async_repl=false --cross_part_num=5' % (id, cmd, ratio) ) 27 | 28 | # for lock in locks: 29 | # for ratio in ratios: 30 | # for i in range(3): 31 | # cmd = get_cmd(n, i) 32 | # print('./bench_tpcc --logtostderr=1 --id=%d --servers="%s" --protocol=Calvin --partition_num=%d --threads=12 --batch_size=10000 --replica_group=4 --lock_manager=%d --query=mixed --neworder_dist=%d --payment_dist=%d' % (id, cmd, 12*n, lock, ratio, ratio)) 33 | -------------------------------------------------------------------------------- /scripts/gc_aria_mp_ycsb.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | ips = [line.strip() for line in open("ips_half.txt", "r")] 4 | n = len(ips) 5 | 6 | ins = [line.split("\t")[0] for line in ips] 7 | outs = [line.split("\t")[1] for line in ips] 8 | 9 | id = int(sys.argv[1]) 10 | port = int(sys.argv[2]) 11 | 12 | ratios = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100] 13 | def get_cmd(n, i): 14 | cmd = "" 15 | for j in range(n): 16 | if j > 0: 17 | cmd += ";" 18 | if id == j: 19 | cmd += ins[j] + ":" + str(port+i) 20 | else: 21 | cmd += outs[j] + ":" + str(port+i) 22 | return cmd 23 | 24 | for i,ratio in enumerate(ratios): 25 | cmd = get_cmd(n, i) 26 | print('./bench_ycsb --logtostderr=1 --id=%d --servers="%s" --threads=6 --read_write_ratio=50 --partition_num=180 --keys=100000 --granule_count=1 --log_path=/mnt/disks/nvme/coord0 --persist_latency=$logging_latency --wal_group_commit_time=1000 --wal_group_commit_size=0 --partitioner=hash --hstore_command_logging=true --protocol=Aria --replica_group=3 --lock_manager=1 --batch_size=$bsize --batch_flush=1 --cross_ratio=%d --lotus_async_repl=false' % (id, cmd, ratio) ) 27 | 28 | # for lock in locks: 29 | # for ratio in ratios: 30 | # for i in range(3): 31 | # cmd = get_cmd(n, i) 32 | # print('./bench_tpcc --logtostderr=1 --id=%d --servers="%s" --protocol=Calvin --partition_num=%d --threads=12 --batch_size=10000 --replica_group=4 --lock_manager=%d --query=mixed --neworder_dist=%d --payment_dist=%d' % (id, cmd, 12*n, lock, ratio, ratio)) 33 | -------------------------------------------------------------------------------- /scripts/gc_hstore_mp_ycsb.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | ips = [line.strip() for line in open("ips.txt", "r")] 4 | n = len(ips) 5 | 6 | ins = [line.split("\t")[0] for line in ips] 7 | outs = [line.split("\t")[1] for line in ips] 8 | 9 | id = int(sys.argv[1]) 10 | port = int(sys.argv[2]) 11 | 12 | ratios = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100] 13 | def get_cmd(n, i): 14 | cmd = "" 15 | for j in range(n): 16 | if j > 0: 17 | cmd += ";" 18 | if id == j: 19 | cmd += ins[j] + ":" + str(port+i) 20 | else: 21 | cmd += outs[j] + ":" + str(port+i) 22 | return cmd 23 | 24 | for i,ratio in enumerate(ratios): 25 | cmd = get_cmd(n, i) 26 | print('./bench_ycsb --logtostderr=1 --id=%d --servers="%s" --threads=6 --read_write_ratio=50 --partition_num=18 --keys=100000 --granule_count=1 --log_path=/mnt/disks/nvme/coord0 --persist_latency=0 --wal_group_commit_time=1000 --wal_group_commit_size=3 --partitioner=hpb --hstore_command_logging=true --protocol=HStore --replica_group=3 --lock_manager=1 --batch_size=$bsize --batch_flush=1 --cross_ratio=%d --lotus_async_repl=false --cross_part_num=5' % (id, cmd, ratio) ) 27 | 28 | # for lock in locks: 29 | # for ratio in ratios: 30 | # for i in range(3): 31 | # cmd = get_cmd(n, i) 32 | # print('./bench_tpcc --logtostderr=1 --id=%d --servers="%s" --protocol=Calvin --partition_num=%d --threads=12 --batch_size=10000 --replica_group=4 --lock_manager=%d --query=mixed --neworder_dist=%d --payment_dist=%d' % (id, cmd, 12*n, lock, ratio, ratio)) 33 | -------------------------------------------------------------------------------- /scripts/gc_2pl_mp_ycsb.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | ips = [line.strip() for line in open("ips.txt", "r")] 4 | n = len(ips) 5 | 6 | ins = [line.split("\t")[0] for line in ips] 7 | outs = [line.split("\t")[1] for line in ips] 8 | 9 | id = int(sys.argv[1]) 10 | port = int(sys.argv[2]) 11 | 12 | ratios = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100] 13 | #ratios = [30,50] 14 | def get_cmd(n, i): 15 | cmd = "" 16 | for j in range(n): 17 | if j > 0: 18 | cmd += ";" 19 | if id == j: 20 | cmd += ins[j] + ":" + str(port+i) 21 | else: 22 | cmd += outs[j] + ":" + str(port+i) 23 | return cmd 24 | 25 | for i,ratio in enumerate(ratios): 26 | cmd = get_cmd(n, i) 27 | print('./bench_ycsb --logtostderr=1 --id=%d --servers="%s" --threads=6 --read_write_ratio=50 --partition_num=18 --keys=100000 --granule_count=1 --log_path=/mnt/disks/nvme/coord0 --persist_latency=0 --wal_group_commit_time=1000 --wal_group_commit_size=3 --partitioner=hash2 --hstore_command_logging=false --protocol=TwoPL --replica_group=3 --lock_manager=1 --batch_flush=1 --cross_ratio=%d --lotus_async_repl=false --cross_part_num=5' % (id, cmd, ratio) ) 28 | 29 | # for lock in locks: 30 | # for ratio in ratios: 31 | # for i in range(3): 32 | # cmd = get_cmd(n, i) 33 | # print('./bench_tpcc --logtostderr=1 --id=%d --servers="%s" --protocol=Calvin --partition_num=%d --threads=12 --batch_size=10000 --replica_group=4 --lock_manager=%d --query=mixed --neworder_dist=%d --payment_dist=%d' % (id, cmd, 12*n, lock, ratio, ratio)) 34 | -------------------------------------------------------------------------------- /scripts/gc_calvin_mp_ycsb.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | ips = [line.strip() for line in open("ips_half.txt", "r")] 4 | n = len(ips) 5 | 6 | ins = [line.split("\t")[0] for line in ips] 7 | outs = [line.split("\t")[1] for line in ips] 8 | 9 | id = int(sys.argv[1]) 10 | port = int(sys.argv[2]) 11 | 12 | ratios = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100] 13 | batch_size=400 14 | def get_cmd(n, i): 15 | cmd = "" 16 | for j in range(n): 17 | if j > 0: 18 | cmd += ";" 19 | if id == j: 20 | cmd += ins[j] + ":" + str(port+i) 21 | else: 22 | cmd += outs[j] + ":" + str(port+i) 23 | return cmd 24 | 25 | for i,ratio in enumerate(ratios): 26 | cmd = get_cmd(n, i) 27 | print('./bench_ycsb --logtostderr=1 --id=%d --servers="%s" --threads=6 --read_write_ratio=50 --partition_num=180 --keys=100000 --granule_count=1 --log_path=/mnt/disks/nvme/coord0 --persist_latency=$logging_latency --wal_group_commit_time=1000 --wal_group_commit_size=0 --partitioner=hash --hstore_command_logging=true --protocol=Calvin --replica_group=3 --lock_manager=1 --batch_size=$bsize --batch_flush=1 --cross_ratio=%d --lotus_async_repl=false' % (id, cmd, ratio) ) 28 | 29 | # for lock in locks: 30 | # for ratio in ratios: 31 | # for i in range(3): 32 | # cmd = get_cmd(n, i) 33 | # print('./bench_tpcc --logtostderr=1 --id=%d --servers="%s" --protocol=Calvin --partition_num=%d --threads=12 --batch_size=10000 --replica_group=4 --lock_manager=%d --query=mixed --neworder_dist=%d --payment_dist=%d' % (id, cmd, 12*n, lock, ratio, ratio)) 34 | -------------------------------------------------------------------------------- /scripts/aws_replication.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | ips = [line.strip() for line in open("ips.txt", "r")] 4 | n = len(ips) 5 | 6 | ins = [line.split("\t")[0] for line in ips] 7 | outs = [line.split("\t")[1] for line in ips] 8 | 9 | id = int(sys.argv[1]) 10 | port = int(sys.argv[2]) 11 | 12 | ratios = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100] 13 | 14 | def get_cmd(n, i): 15 | cmd = "" 16 | for j in range(n): 17 | if j > 0: 18 | cmd += ";" 19 | if id == j: 20 | cmd += ins[j] + ":" + str(port+i) 21 | else: 22 | cmd += outs[j] + ":" + str(port+i) 23 | return cmd 24 | 25 | 26 | for ratio in ratios: 27 | for i in range(3): 28 | cmd = get_cmd(n, i) 29 | print('./bench_tpcc --logtostderr=1 --id=%d --servers="%s" --protocol=Star --partition_num=%d --threads=12 --partitioner=hash2 --query=mixed --neworder_dist=%d --payment_dist=%d --star_sync=true' % (id, cmd, 12*n, ratio, ratio)) 30 | 31 | for ratio in ratios: 32 | for i in range(3): 33 | cmd = get_cmd(n, i) 34 | print('./bench_tpcc --logtostderr=1 --id=%d --servers="%s" --protocol=Star --partition_num=%d --threads=12 --partitioner=hash2 --query=mixed --neworder_dist=%d --payment_dist=%d' % (id, cmd, 12*n, ratio, ratio)) 35 | 36 | for ratio in ratios: 37 | for i in range(3): 38 | cmd = get_cmd(n, i) 39 | print('./bench_tpcc --logtostderr=1 --id=%d --servers="%s" --protocol=Star --partition_num=%d --threads=12 --partitioner=hash2 --query=mixed --neworder_dist=%d --payment_dist=%d --operation_replication=true' % (id, cmd, 12*n, ratio, ratio)) -------------------------------------------------------------------------------- /benchmark/tpcc/Random.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 7/14/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | #include "common/Random.h" 11 | 12 | namespace star { 13 | namespace tpcc { 14 | class Random : public star::Random { 15 | public: 16 | using star::Random::Random; 17 | 18 | uint64_t non_uniform_distribution(uint64_t A, uint64_t x, uint64_t y) { 19 | return (uniform_dist(0, A) | uniform_dist(x, y)) % (y - x + 1) + x; 20 | } 21 | 22 | std::string n_string(std::size_t min_len, std::size_t max_len) { 23 | auto len = uniform_dist(min_len, max_len); 24 | return rand_str(len, numeric()); 25 | } 26 | 27 | std::string rand_zip() { 28 | auto zip = n_string(4, 4); 29 | // append "11111" 30 | for (int i = 0; i < 5; i++) { 31 | zip += '1'; 32 | } 33 | return zip; 34 | } 35 | 36 | std::string rand_last_name(int n) { 37 | const auto &last_names = customer_last_names(); 38 | const auto &s1 = last_names[n / 100]; 39 | const auto &s2 = last_names[n / 10 % 10]; 40 | const auto &s3 = last_names[n % 10]; 41 | return s1 + s2 + s3; 42 | } 43 | 44 | private: 45 | static const std::vector &customer_last_names() { 46 | static std::vector last_names = { 47 | "BAR", "OUGHT", "ABLE", "PRI", "PRES", 48 | "ESE", "ANTI", "CALLY", "ATION", "EING"}; 49 | return last_names; 50 | } 51 | 52 | static const std::string &numeric() { 53 | static std::string numeric_ = "0123456789"; 54 | return numeric_; 55 | }; 56 | }; 57 | } // namespace tpcc 58 | } // namespace star 59 | -------------------------------------------------------------------------------- /scripts/aws_disk_logging.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | ips = [line.strip() for line in open("ips.txt", "r")] 4 | n = len(ips) 5 | 6 | ins = [line.split("\t")[0] for line in ips] 7 | outs = [line.split("\t")[1] for line in ips] 8 | 9 | id = int(sys.argv[1]) 10 | port = int(sys.argv[2]) 11 | 12 | def get_cmd(n, i): 13 | cmd = "" 14 | for j in range(n): 15 | if j > 0: 16 | cmd += ";" 17 | if id == j: 18 | cmd += ins[j] + ":" + str(port+i) 19 | else: 20 | cmd += outs[j] + ":" + str(port+i) 21 | return cmd 22 | 23 | 24 | #for i in range(3): 25 | # cmd = get_cmd(n, i) 26 | # print('./bench_ycsb --logtostderr=1 --id=%d --servers="%s" --protocol=Star --partition_num=%d --threads=12 --read_write_ratio=90 --cross_ratio=10 --batch_flush=200' % (id, cmd, 12*n)) 27 | 28 | 29 | #for i in range(3): 30 | # cmd = get_cmd(n, i) 31 | # print('./bench_ycsb --logtostderr=1 --id=%d --servers="%s" --protocol=Star --partition_num=%d --threads=12 --read_write_ratio=90 --cross_ratio=10 --batch_flush=200 --log_path=/home/ubuntu/star.logging' % (id, cmd, 12*n)) 32 | 33 | 34 | #for i in range(3): 35 | # cmd = get_cmd(n, i) 36 | # print('./bench_tpcc --logtostderr=1 --id=%d --servers="%s" --protocol=Star --partition_num=%d --threads=12 --query=mixed --neworder_dist=10 --payment_dist=15' % (id, cmd, 12*n)) 37 | 38 | for i in range(3): 39 | cmd = get_cmd(n, i) 40 | print('./bench_tpcc --logtostderr=1 --id=%d --servers="%s" --protocol=Star --partition_num=%d --threads=12 --query=mixed --neworder_dist=10 --payment_dist=15 --log_path=/home/ubuntu/star.logging' % (id, cmd, 12*n)) 41 | 42 | 43 | -------------------------------------------------------------------------------- /common/Random.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 7/14/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | namespace star { 10 | 11 | class Random { 12 | public: 13 | Random(uint64_t seed = 0) { init_seed(seed); } 14 | 15 | void init_seed(uint64_t seed) { 16 | seed_ = (seed ^ 0x5DEECE66DULL) & ((1ULL << 48) - 1); 17 | } 18 | 19 | void set_seed(uint64_t seed) { seed_ = seed; } 20 | 21 | uint64_t get_seed() { return seed_; } 22 | 23 | uint64_t next() { return ((uint64_t)next(32) << 32) + next(32); } 24 | 25 | uint64_t next(unsigned int bits) { 26 | seed_ = (seed_ * 0x5DEECE66DULL + 0xBULL) & ((1ULL << 48) - 1); 27 | return (seed_ >> (48 - bits)); 28 | } 29 | 30 | /* [0.0, 1.0) */ 31 | double next_double() { 32 | return (((uint64_t)next(26) << 27) + next(27)) / (double)(1ULL << 53); 33 | } 34 | 35 | uint64_t uniform_dist(uint64_t a, uint64_t b) { 36 | if (a == b) 37 | return a; 38 | return next() % (b - a + 1) + a; 39 | } 40 | 41 | std::string rand_str(std::size_t length, const std::string &str) { 42 | std::string result; 43 | auto str_len = str.length(); 44 | for (auto i = 0u; i < length; i++) { 45 | int k = uniform_dist(0, str_len - 1); 46 | result += str[k]; 47 | } 48 | return result; 49 | } 50 | 51 | std::string a_string(std::size_t min_len, std::size_t max_len) { 52 | auto len = uniform_dist(min_len, max_len); 53 | return rand_str(len, alpha()); 54 | } 55 | 56 | private: 57 | static const std::string &alpha() { 58 | static std::string alpha_ = 59 | "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; 60 | return alpha_; 61 | }; 62 | 63 | uint64_t seed_; 64 | }; 65 | } // namespace star 66 | -------------------------------------------------------------------------------- /scripts/aws_perf_comparison_pb.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | ips = [line.strip() for line in open("ips.txt", "r")][:2] 4 | n = len(ips) 5 | 6 | ins = [line.split("\t")[0] for line in ips] 7 | outs = [line.split("\t")[1] for line in ips] 8 | 9 | id = int(sys.argv[1]) 10 | port = int(sys.argv[2]) 11 | 12 | ratios = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100] 13 | 14 | def get_cmd(n, i): 15 | cmd = "" 16 | for j in range(n): 17 | if j > 0: 18 | cmd += ";" 19 | if id == j: 20 | cmd += ins[j] + ":" + str(port+i) 21 | else: 22 | cmd += outs[j] + ":" + str(port+i) 23 | return cmd 24 | 25 | 26 | for ratio in ratios: 27 | for i in range(3): 28 | cmd = get_cmd(n, i) 29 | print('./bench_ycsb --logtostderr=1 --id=%d --servers="%s" --protocol=SiloGC --partition_num=%d --threads=12 --partitioner=pb --read_write_ratio=90 --cross_ratio=%d --batch_flush=200' % (id, cmd, 48, ratio)) 30 | 31 | for ratio in ratios: 32 | for i in range(3): 33 | cmd = get_cmd(n, i) 34 | print('./bench_tpcc --logtostderr=1 --id=%d --servers="%s" --protocol=SiloGC --partition_num=%d --threads=12 --partitioner=pb --query=mixed --neworder_dist=%d --payment_dist=%d' % (id, cmd, 48, ratio, ratio)) 35 | 36 | 37 | #for ratio in ratios: 38 | # for i in range(3): 39 | # cmd = get_cmd(n, i) 40 | # print('./bench_ycsb --logtostderr=1 --id=%d --servers="%s" --protocol=Silo --partition_num=%d --threads=12 --partitioner=pb --read_write_ratio=90 --cross_ratio=%d' % (id, cmd, 48, ratio)) 41 | 42 | #for ratio in ratios: 43 | # for i in range(3): 44 | # cmd = get_cmd(n, i) 45 | # print('./bench_tpcc --logtostderr=1 --id=%d --servers="%s" --protocol=Silo --partition_num=%d --threads=12 --partitioner=pb --query=mixed --neworder_dist=%d --payment_dist=%d' % (id, cmd, 48, ratio, ratio)) 46 | -------------------------------------------------------------------------------- /core/Worker.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 7/22/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include "common/LockfreeQueue.h" 8 | #include "common/Message.h" 9 | #include 10 | #include 11 | #include 12 | 13 | namespace star { 14 | 15 | class Worker { 16 | public: 17 | Worker(std::size_t coordinator_id, std::size_t id) 18 | : coordinator_id(coordinator_id), id(id) { 19 | n_commit.store(0); 20 | n_abort_no_retry.store(0); 21 | n_abort_lock.store(0); 22 | n_abort_read_validation.store(0); 23 | n_local.store(0); 24 | n_si_in_serializable.store(0); 25 | n_network_size.store(0); 26 | } 27 | 28 | virtual ~Worker() = default; 29 | 30 | virtual void start() = 0; 31 | 32 | virtual void onExit() {} 33 | 34 | virtual void start_hstore_master() {} 35 | 36 | virtual void push_master_special_message(Message *message) { } 37 | 38 | virtual Message *pop_master_message() { return nullptr; } 39 | 40 | virtual void push_master_message(Message *message) { } 41 | 42 | virtual void push_message(Message *message) = 0; 43 | 44 | virtual void push_replica_message(Message *message) = 0; 45 | 46 | virtual Message *pop_message() = 0; 47 | 48 | public: 49 | std::size_t coordinator_id; 50 | std::size_t id; 51 | std::atomic n_commit, n_abort_no_retry, n_abort_lock, 52 | n_abort_read_validation, n_local, n_si_in_serializable, n_network_size; 53 | 54 | std::atomic n_failed_write_lock{0}, n_failed_read_lock{0}, n_failed_no_cmd{0}, n_failed_cmd_not_ready{0}; 55 | 56 | std::atomic last_window_persistence_latency{0}; 57 | std::atomic last_window_txn_latency{0}; 58 | std::atomic last_window_queued_lock_req_latency{0}; 59 | std::atomic last_window_lock_req_latency{0}; 60 | std::atomic last_window_active_txns{0}; 61 | }; 62 | 63 | } // namespace star 64 | -------------------------------------------------------------------------------- /common/Encoder.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 7/17/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | #include "Serialization.h" 11 | #include "StringPiece.h" 12 | 13 | namespace star { 14 | class Encoder { 15 | public: 16 | Encoder(std::string &bytes) : bytes(bytes) {} 17 | 18 | template friend Encoder &operator<<(Encoder &enc, const T &rhs); 19 | 20 | StringPiece toStringPiece() { 21 | return StringPiece(bytes.data(), bytes.size()); 22 | } 23 | 24 | void write_n_bytes(const void *ptr, std::size_t size) { 25 | bytes.append(static_cast(ptr), size); 26 | } 27 | 28 | std::size_t size() { return bytes.size(); } 29 | 30 | void replace_bytes_range(std::size_t offset, const void * ptr, std::size_t size) { 31 | memcpy(&bytes[0] + offset, ptr, size); 32 | } 33 | private: 34 | std::string &bytes; 35 | }; 36 | 37 | template Encoder &operator<<(Encoder &enc, const T &rhs) { 38 | Serializer serializer; 39 | enc.bytes += serializer(rhs); 40 | return enc; 41 | } 42 | 43 | class Decoder { 44 | public: 45 | Decoder(StringPiece bytes) : bytes(bytes) {} 46 | 47 | template friend Decoder &operator>>(Decoder &dec, T &rhs); 48 | 49 | void read_n_bytes(void *ptr, std::size_t size) { 50 | DCHECK(bytes.size() >= size); 51 | std::memcpy(ptr, bytes.data(), size); 52 | bytes.remove_prefix(size); 53 | } 54 | 55 | void remove_prefix(std::size_t size) { 56 | bytes.remove_prefix(size); 57 | } 58 | 59 | const char * get_raw_ptr() { return bytes.data(); } 60 | 61 | std::size_t size() { return bytes.size(); } 62 | 63 | StringPiece bytes; 64 | }; 65 | 66 | template Decoder &operator>>(Decoder &dec, T &rhs) { 67 | Deserializer deserializer; 68 | std::size_t size = deserializer(dec.bytes, rhs); 69 | dec.bytes.remove_prefix(size); 70 | return dec; 71 | } 72 | } // namespace star 73 | -------------------------------------------------------------------------------- /scripts/run_lotus_mp_ycsb.sh: -------------------------------------------------------------------------------- 1 | results_dir=~/star/exp_results/gc_lotus_mp_ycsb 2 | mkdir -p $results_dir 3 | cd $results_dir 4 | script_name="gc_lotus_mp_ycsb_run" 5 | bsize=400 6 | lotus_async_repl=true 7 | partition_num=180 8 | granule_count=1000 9 | cross_part_num=2 10 | logging_latency=0 11 | nohup gcloud compute ssh --zone us-central1-a node2 --command "cd star;logging_latency=$logging_latency cross_part_num=$cross_part_num lotus_async_repl=$lotus_async_repl granule_count=$granule_count partition_num=$partition_num bsize=$bsize ./$script_name.sh" > $script_name-2.log & 12 | nohup gcloud compute ssh --zone us-central1-a node3 --command "cd star;logging_latency=$logging_latency cross_part_num=$cross_part_num lotus_async_repl=$lotus_async_repl granule_count=$granule_count partition_num=$partition_num bsize=$bsize ./$script_name.sh" > $script_name-3.log & 13 | nohup gcloud compute ssh --zone us-central1-a node4 --command "cd star;logging_latency=$logging_latency cross_part_num=$cross_part_num lotus_async_repl=$lotus_async_repl granule_count=$granule_count partition_num=$partition_num bsize=$bsize ./$script_name.sh" > $script_name-4.log & 14 | nohup gcloud compute ssh --zone us-central1-a node5 --command "cd star;logging_latency=$logging_latency cross_part_num=$cross_part_num lotus_async_repl=$lotus_async_repl granule_count=$granule_count partition_num=$partition_num bsize=$bsize ./$script_name.sh" > $script_name-5.log & 15 | nohup gcloud compute ssh --zone us-central1-a node6 --command "cd star;logging_latency=$logging_latency cross_part_num=$cross_part_num lotus_async_repl=$lotus_async_repl granule_count=$granule_count partition_num=$partition_num bsize=$bsize ./$script_name.sh" > $script_name-6.log & 16 | 17 | log_path=$results_dir/$script_name-1.log 18 | cd ~/star 19 | cross_part_num=$cross_part_num logging_latency=$logging_latency lotus_async_repl=$lotus_async_repl granule_count=$granule_count partition_num=$partition_num bsize=$bsize ./$script_name.sh > $log_path 2>&1 20 | -------------------------------------------------------------------------------- /scripts/aws_scalability.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | ips = [line.strip() for line in open("ips.txt", "r")] 4 | 5 | id = int(sys.argv[1]) 6 | port = int(sys.argv[2]) 7 | 8 | #protocols = ["SiloGC", "TwoPLGC"] 9 | #protocols = ["Calvin"] 10 | protocols = ["Star"] 11 | ns = [8, 7, 6, 5, 4, 3, 2] 12 | 13 | for n in ns: 14 | if id >= n: 15 | break 16 | ins = [line.split("\t")[0] for line in ips[0:n]] 17 | outs = [line.split("\t")[1] for line in ips[0:n]] 18 | for protocol in protocols: 19 | for i in range(3): 20 | cmd = "" 21 | for j in range(n): 22 | if j > 0: 23 | cmd += ";" 24 | if id == j: 25 | cmd += ins[j] + ":" + str(port+i) 26 | else: 27 | cmd += outs[j] + ":" + str(port+i) 28 | print('./bench_ycsb --logtostderr=1 --id=%d --servers="%s" --protocol=%s --partition_num=%d --partitioner=hash2 --threads=12 --read_write_ratio=90 --cross_ratio=10 --batch_size=1000 --batch_flush=200' % (id, cmd, protocol, 12*n)) 29 | #print('./bench_ycsb --logtostderr=1 --id=%d --servers="%s" --protocol=%s --partition_num=%d --threads=12 --batch_size=10000 --replica_group=%d --lock_manager=2 --read_write_ratio=90 --cross_ratio=10' % (id, cmd, protocol, 12*n, n)) 30 | 31 | for i in range(3): 32 | cmd = "" 33 | for j in range(n): 34 | if j > 0: 35 | cmd += ";" 36 | if id == j: 37 | cmd += ins[j] + ":" + str(port+i) 38 | else: 39 | cmd += outs[j] + ":" + str(port+i) 40 | 41 | print('./bench_tpcc --logtostderr=1 --id=%d --servers="%s" --protocol=%s --partition_num=%d --partitioner=hash2 --threads=12 --query=mixed --neworder_dist=10 --payment_dist=15 --batch_size=1000' % (id, cmd, protocol, 12*n)) 42 | #print('./bench_tpcc --logtostderr=1 --id=%d --servers="%s" --protocol=%s --partition_num=%d --threads=12 --batch_size=10000 --replica_group=%d --lock_manager=4 --query=mixed --neworder_dist=10 --payment_dist=15' % (id, cmd, protocol, 12*n, n)) 43 | -------------------------------------------------------------------------------- /scripts/run_lotus_mp_ycsb_sync.sh: -------------------------------------------------------------------------------- 1 | results_dir=~/star/exp_results/gc_lotus_mp_ycsb_sync 2 | mkdir -p $results_dir 3 | cd $results_dir 4 | script_name="gc_lotus_mp_ycsb_run" 5 | bsize=6 # one tranasction per worker 6 | lotus_async_repl=false # active-active replication 7 | partition_num=18 8 | granule_count=2000 9 | cross_part_num=5 10 | logging_latency=0 11 | nohup gcloud compute ssh --zone us-central1-a node2 --command "cd star;logging_latency=$logging_latency cross_part_num=$cross_part_num lotus_async_repl=$lotus_async_repl granule_count=$granule_count partition_num=$partition_num bsize=$bsize ./$script_name.sh" > $script_name-2.log & 12 | nohup gcloud compute ssh --zone us-central1-a node3 --command "cd star;logging_latency=$logging_latency cross_part_num=$cross_part_num lotus_async_repl=$lotus_async_repl granule_count=$granule_count partition_num=$partition_num bsize=$bsize ./$script_name.sh" > $script_name-3.log & 13 | nohup gcloud compute ssh --zone us-central1-a node4 --command "cd star;logging_latency=$logging_latency cross_part_num=$cross_part_num lotus_async_repl=$lotus_async_repl granule_count=$granule_count partition_num=$partition_num bsize=$bsize ./$script_name.sh" > $script_name-4.log & 14 | nohup gcloud compute ssh --zone us-central1-a node5 --command "cd star;logging_latency=$logging_latency cross_part_num=$cross_part_num lotus_async_repl=$lotus_async_repl granule_count=$granule_count partition_num=$partition_num bsize=$bsize ./$script_name.sh" > $script_name-5.log & 15 | nohup gcloud compute ssh --zone us-central1-a node6 --command "cd star;logging_latency=$logging_latency cross_part_num=$cross_part_num lotus_async_repl=$lotus_async_repl granule_count=$granule_count partition_num=$partition_num bsize=$bsize ./$script_name.sh" > $script_name-6.log & 16 | 17 | 18 | log_path=$results_dir/$script_name-1.log 19 | cd ~/star 20 | logging_latency=$logging_latency cross_part_num=$cross_part_num lotus_async_repl=$lotus_async_repl granule_count=$granule_count partition_num=$partition_num bsize=$bsize ./$script_name.sh > $log_path 2>&1 21 | -------------------------------------------------------------------------------- /scripts/aws_latency.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | ips = [line.strip() for line in open("ips.txt", "r")] 4 | n = len(ips) 5 | 6 | ins = [line.split("\t")[0] for line in ips] 7 | outs = [line.split("\t")[1] for line in ips] 8 | 9 | id = int(sys.argv[1]) 10 | port = int(sys.argv[2]) 11 | 12 | protocols = ["Silo", "TwoPL"] 13 | ratios = [10, 50, 90] 14 | 15 | def get_cmd(n, i): 16 | cmd = "" 17 | for j in range(n): 18 | if j > 0: 19 | cmd += ";" 20 | if id == j: 21 | cmd += ins[j] + ":" + str(port+i) 22 | else: 23 | cmd += outs[j] + ":" + str(port+i) 24 | return cmd 25 | 26 | 27 | for protocol in protocols: 28 | for i in range(len(ratios)): 29 | ratio = ratios[i] 30 | cmd = get_cmd(n, i) 31 | print('./bench_ycsb --logtostderr=1 --id=%d --servers="%s" --protocol=%s --partition_num=%d --threads=12 --partitioner=hash2 --read_write_ratio=90 --cross_ratio=%d --batch_flush=20' % (id, cmd, protocol, 12*n, ratio)) 32 | 33 | for protocol in protocols: 34 | for i in range(len(ratios)): 35 | ratio = ratios[i] 36 | cmd = get_cmd(n, i) 37 | print('./bench_tpcc --logtostderr=1 --id=%d --servers="%s" --protocol=%s --partition_num=%d --threads=12 --partitioner=hash2 --query=mixed --neworder_dist=%d --payment_dist=%d --batch_flush=20' % (id, cmd, protocol, 12*n, ratio, ratio)) 38 | 39 | 40 | cmd = get_cmd(n, 0) 41 | print('./bench_ycsb --logtostderr=1 --id=%d --servers="%s" --protocol=%s --partition_num=%d --threads=12 --partitioner=hash2 --read_write_ratio=90 --cross_ratio=%d --batch_flush=20' % (id, cmd, "Star", 12*n, ratio)) 42 | cmd = get_cmd(n, 1) 43 | print('./bench_ycsb --logtostderr=1 --id=%d --servers="%s" --protocol=%s --partition_num=%d --threads=12 --partitioner=hash2 --read_write_ratio=90 --cross_ratio=%d --batch_flush=20' % (id, cmd, "SiloGC", 12*n, ratio)) 44 | cmd = get_cmd(n, 2) 45 | print('./bench_ycsb --logtostderr=1 --id=%d --servers="%s" --protocol=%s --partition_num=%d --threads=12 --partitioner=hash2 --read_write_ratio=90 --cross_ratio=%d --batch_flush=20' % (id, cmd, "TwoPLGC", 12*n, ratio)) -------------------------------------------------------------------------------- /protocol/Aria/Aria.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 1/7/19. 3 | // 4 | 5 | #pragma once 6 | 7 | #include "core/Partitioner.h" 8 | #include "core/Table.h" 9 | #include "protocol/Aria/AriaHelper.h" 10 | #include "protocol/Aria/AriaMessage.h" 11 | #include "protocol/Aria/AriaTransaction.h" 12 | 13 | namespace star { 14 | 15 | template class Aria { 16 | public: 17 | using DatabaseType = Database; 18 | using MetaDataType = std::atomic; 19 | using ContextType = typename DatabaseType::ContextType; 20 | using MessageType = AriaMessage; 21 | using TransactionType = AriaTransaction; 22 | 23 | using MessageFactoryType = AriaMessageFactory; 24 | using MessageHandlerType = AriaMessageHandler; 25 | 26 | Aria(DatabaseType &db, const ContextType &context, Partitioner &partitioner) 27 | : db(db), context(context), partitioner(partitioner) {} 28 | 29 | void abort(TransactionType &txn, 30 | std::vector> &messages) { 31 | // nothing needs to be done 32 | } 33 | 34 | bool commit(TransactionType &txn, 35 | std::vector> &messages) { 36 | 37 | auto &writeSet = txn.writeSet; 38 | for (auto i = 0u; i < writeSet.size(); i++) { 39 | auto &writeKey = writeSet[i]; 40 | auto tableId = writeKey.get_table_id(); 41 | auto partitionId = writeKey.get_partition_id(); 42 | auto table = db.find_table(tableId, partitionId); 43 | 44 | if (partitioner.has_master_partition(partitionId)) { 45 | auto key = writeKey.get_key(); 46 | auto value = writeKey.get_value(); 47 | table->update(key, value); 48 | } else { 49 | auto coordinatorID = partitioner.master_coordinator(partitionId); 50 | txn.network_size += MessageFactoryType::new_write_message( 51 | *messages[coordinatorID], *table, writeKey.get_key(), 52 | writeKey.get_value()); 53 | } 54 | } 55 | 56 | return true; 57 | } 58 | 59 | private: 60 | DatabaseType &db; 61 | const ContextType &context; 62 | Partitioner &partitioner; 63 | }; 64 | } // namespace aria -------------------------------------------------------------------------------- /common/BufferedFileWriter.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 3/21/19. 3 | // 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | class BufferedFileWriter { 15 | 16 | public: 17 | BufferedFileWriter(const char *filename, std::size_t emulated_persist_latency = 0) 18 | : emulated_persist_latency(emulated_persist_latency) { 19 | fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 20 | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH); 21 | CHECK(fd >= 0); 22 | bytes_total = 0; 23 | } 24 | 25 | void write(const char *str, long size) { 26 | 27 | if (bytes_total + size < BUFFER_SIZE) { 28 | memcpy(buffer + bytes_total, str, size); 29 | bytes_total += size; 30 | return; 31 | } 32 | 33 | auto copy_size = BUFFER_SIZE - bytes_total; 34 | 35 | memcpy(buffer + bytes_total, str, copy_size); 36 | bytes_total += copy_size; 37 | flush(); 38 | 39 | str += copy_size; 40 | size -= copy_size; 41 | 42 | if (size >= BUFFER_SIZE) { 43 | int err = ::write(fd, str, size); 44 | CHECK(err >= 0); 45 | bytes_total = 0; 46 | } else { 47 | memcpy(buffer, str, size); 48 | bytes_total += size; 49 | } 50 | } 51 | 52 | void flush() { 53 | DCHECK(fd >= 0); 54 | if (bytes_total > 0) { 55 | int err = ::write(fd, buffer, bytes_total); 56 | CHECK(err >= 0); 57 | } 58 | bytes_total = 0; 59 | } 60 | 61 | void sync() { 62 | flush(); 63 | DCHECK(fd >= 0); 64 | int err = 0; 65 | if (emulated_persist_latency != 0) { 66 | std::this_thread::sleep_for(std::chrono::microseconds(emulated_persist_latency)); 67 | } else { 68 | err = fdatasync(fd); 69 | } 70 | CHECK(err == 0); 71 | } 72 | 73 | void close() { 74 | flush(); 75 | int err = ::close(fd); 76 | CHECK(err == 0); 77 | } 78 | 79 | public: 80 | static constexpr uint32_t BUFFER_SIZE = 1024 * 1024 * 4; // 4MB 81 | 82 | private: 83 | int fd; 84 | char buffer[BUFFER_SIZE]; 85 | std::size_t bytes_total; 86 | std::size_t emulated_persist_latency; 87 | }; -------------------------------------------------------------------------------- /core/group_commit/Manager.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 9/10/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include "core/Manager.h" 8 | 9 | namespace star { 10 | namespace group_commit { 11 | 12 | class Manager : public star::Manager { 13 | public: 14 | using base_type = star::Manager; 15 | 16 | Manager(std::size_t coordinator_id, std::size_t id, const Context &context, 17 | std::atomic &stopFlag) 18 | : base_type(coordinator_id, id, context, stopFlag) {} 19 | 20 | void coordinator_start() override { 21 | 22 | std::size_t n_workers = context.worker_num; 23 | std::size_t n_coordinators = context.coordinator_num; 24 | 25 | while (!stopFlag.load()) { 26 | 27 | n_started_workers.store(0); 28 | n_completed_workers.store(0); 29 | signal_worker(ExecutorStatus::START); 30 | wait_all_workers_start(); 31 | std::this_thread::sleep_for( 32 | std::chrono::milliseconds(context.group_time)); 33 | set_worker_status(ExecutorStatus::STOP); 34 | wait_all_workers_finish(); 35 | broadcast_stop(); 36 | wait4_stop(n_coordinators - 1); 37 | // process replication 38 | n_completed_workers.store(0); 39 | set_worker_status(ExecutorStatus::CLEANUP); 40 | wait_all_workers_finish(); 41 | wait4_ack(); 42 | } 43 | 44 | signal_worker(ExecutorStatus::EXIT); 45 | } 46 | 47 | void non_coordinator_start() override { 48 | 49 | std::size_t n_workers = context.worker_num; 50 | std::size_t n_coordinators = context.coordinator_num; 51 | 52 | for (;;) { 53 | 54 | ExecutorStatus status = wait4_signal(); 55 | if (status == ExecutorStatus::EXIT) { 56 | set_worker_status(ExecutorStatus::EXIT); 57 | break; 58 | } 59 | 60 | DCHECK(status == ExecutorStatus::START); 61 | n_completed_workers.store(0); 62 | n_started_workers.store(0); 63 | set_worker_status(ExecutorStatus::START); 64 | wait_all_workers_start(); 65 | wait4_stop(1); 66 | set_worker_status(ExecutorStatus::STOP); 67 | wait_all_workers_finish(); 68 | broadcast_stop(); 69 | wait4_stop(n_coordinators - 2); 70 | // process replication 71 | n_completed_workers.store(0); 72 | set_worker_status(ExecutorStatus::CLEANUP); 73 | wait_all_workers_finish(); 74 | send_ack(); 75 | } 76 | } 77 | }; 78 | 79 | } // namespace group_commit 80 | } // namespace star 81 | -------------------------------------------------------------------------------- /protocol/Silo/SiloHelper.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 9/3/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | #include "glog/logging.h" 10 | 11 | namespace star { 12 | 13 | class SiloHelper { 14 | 15 | public: 16 | using MetaDataType = std::atomic; 17 | 18 | static uint64_t read(const std::tuple &row, 19 | void *dest, std::size_t size) { 20 | 21 | MetaDataType &tid = *std::get<0>(row); 22 | void *src = std::get<1>(row); 23 | 24 | // read from a consistent view. read the value even it's locked by others. 25 | // abort in read validation phase 26 | uint64_t tid_; 27 | do { 28 | tid_ = tid.load(); 29 | std::memcpy(dest, src, size); 30 | } while (tid_ != tid.load()); 31 | 32 | return remove_lock_bit(tid_); 33 | } 34 | 35 | static bool is_locked(uint64_t value) { 36 | return (value >> LOCK_BIT_OFFSET) & LOCK_BIT_MASK; 37 | } 38 | 39 | static uint64_t lock(std::atomic &a) { 40 | uint64_t oldValue, newValue; 41 | do { 42 | do { 43 | oldValue = a.load(); 44 | } while (is_locked(oldValue)); 45 | newValue = (LOCK_BIT_MASK << LOCK_BIT_OFFSET) | oldValue; 46 | } while (!a.compare_exchange_weak(oldValue, newValue)); 47 | DCHECK(is_locked(oldValue) == false); 48 | return oldValue; 49 | } 50 | 51 | static uint64_t lock(std::atomic &a, bool &success) { 52 | uint64_t oldValue = a.load(); 53 | 54 | if (is_locked(oldValue)) { 55 | success = false; 56 | } else { 57 | uint64_t newValue = (LOCK_BIT_MASK << LOCK_BIT_OFFSET) | oldValue; 58 | success = a.compare_exchange_strong(oldValue, newValue); 59 | } 60 | return oldValue; 61 | } 62 | 63 | static void unlock(std::atomic &a) { 64 | uint64_t oldValue = a.load(); 65 | DCHECK(is_locked(oldValue)); 66 | uint64_t newValue = remove_lock_bit(oldValue); 67 | bool ok = a.compare_exchange_strong(oldValue, newValue); 68 | DCHECK(ok); 69 | } 70 | 71 | static void unlock(std::atomic &a, uint64_t newValue) { 72 | uint64_t oldValue = a.load(); 73 | DCHECK(is_locked(oldValue)); 74 | DCHECK(is_locked(newValue) == false); 75 | bool ok = a.compare_exchange_strong(oldValue, newValue); 76 | DCHECK(ok); 77 | } 78 | 79 | static uint64_t remove_lock_bit(uint64_t value) { 80 | return value & ~(LOCK_BIT_MASK << LOCK_BIT_OFFSET); 81 | } 82 | 83 | public: 84 | static constexpr int LOCK_BIT_OFFSET = 63; 85 | static constexpr uint64_t LOCK_BIT_MASK = 0x1ull; 86 | }; 87 | 88 | } // namespace star -------------------------------------------------------------------------------- /common/Percentile.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 8/29/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "Random.h" 13 | // The nearest-rank method 14 | // https://en.wikipedia.org/wiki/Percentile 15 | 16 | namespace star { 17 | extern bool warmed_up; 18 | template class Percentile { 19 | public: 20 | Percentile() : rand((uint64_t)this){} 21 | using element_type = T; 22 | void add(const element_type &value) { 23 | if (warmed_up == false || rand.uniform_dist(0, 100) > 10) // record 2% of the data 24 | return; 25 | isSorted_ = false; 26 | data_.push_back(value); 27 | sum += value; 28 | } 29 | 30 | void add(const std::vector &v) { 31 | isSorted_ = false; 32 | std::copy(v.begin(), v.end(), std::back_inserter(data_)); 33 | } 34 | 35 | void clear() { 36 | isSorted_ = true; 37 | data_.clear(); 38 | } 39 | 40 | auto size() { return data_.size(); } 41 | 42 | element_type avg() { 43 | return sum / (size() + 0.1); 44 | } 45 | 46 | element_type nth(double n) { 47 | if (data_.size() == 0) { 48 | return 0; 49 | } 50 | checkSort(); 51 | DCHECK(n > 0 && n <= 100); 52 | auto sz = size(); 53 | auto i = static_cast(ceil(n / 100 * sz)) - 1; 54 | DCHECK(i >= 0 && i < size()); 55 | return data_[i]; 56 | } 57 | 58 | void save_cdf(const std::string &path) { 59 | if (data_.size() == 0) { 60 | return; 61 | } 62 | checkSort(); 63 | 64 | if (path.empty()) { 65 | return; 66 | } 67 | 68 | std::ofstream cdf; 69 | cdf.open(path); 70 | 71 | cdf << "value\tcdf" << std::endl; 72 | 73 | // output ~ 1k rows 74 | auto step_size = std::max(1, int(data_.size() * 0.99 / 1000)); 75 | 76 | std::vector cdf_result; 77 | 78 | for (auto i = 0u; i < 0.99 * data_.size(); i += step_size) { 79 | cdf_result.push_back(data_[i]); 80 | } 81 | 82 | for (auto i = 0u; i < cdf_result.size(); i++) { 83 | cdf << cdf_result[i] << "\t" << 1.0 * (i + 1) / cdf_result.size() 84 | << std::endl; 85 | } 86 | 87 | cdf.close(); 88 | } 89 | 90 | private: 91 | void checkSort() { 92 | if (!isSorted_) { 93 | std::sort(data_.begin(), data_.end()); 94 | isSorted_ = true; 95 | } 96 | } 97 | 98 | private: 99 | Random rand; 100 | bool isSorted_ = true; 101 | std::vector data_; 102 | element_type sum = 0; 103 | }; 104 | } // namespace star -------------------------------------------------------------------------------- /protocol/Calvin/Calvin.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 9/14/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include "core/Partitioner.h" 8 | #include "core/Table.h" 9 | #include "protocol/Calvin/CalvinHelper.h" 10 | #include "protocol/Calvin/CalvinMessage.h" 11 | #include "protocol/Calvin/CalvinTransaction.h" 12 | 13 | namespace star { 14 | 15 | template class Calvin { 16 | public: 17 | using DatabaseType = Database; 18 | using MetaDataType = std::atomic; 19 | using ContextType = typename DatabaseType::ContextType; 20 | using MessageType = CalvinMessage; 21 | using TransactionType = CalvinTransaction; 22 | 23 | using MessageFactoryType = CalvinMessageFactory; 24 | using MessageHandlerType = CalvinMessageHandler; 25 | 26 | Calvin(DatabaseType &db, CalvinPartitioner &partitioner) 27 | : db(db), partitioner(partitioner) {} 28 | 29 | void abort(std::vector> & messages, TransactionType &txn, std::size_t lock_manager_id, 30 | std::size_t n_lock_manager, std::size_t replica_group_size) { 31 | } 32 | 33 | bool commit(std::vector> & messages, TransactionType &txn, std::size_t lock_manager_id, 34 | std::size_t n_lock_manager, std::size_t replica_group_size) { 35 | ScopedTimer t([&, this](uint64_t us) { 36 | txn.record_commit_write_back_time(us); 37 | }); 38 | // write to db 39 | write(messages, txn, lock_manager_id, n_lock_manager, replica_group_size); 40 | return true; 41 | } 42 | 43 | void write(std::vector> & messages, TransactionType &txn, std::size_t lock_manager_id, 44 | std::size_t n_lock_manager, std::size_t replica_group_size) { 45 | 46 | auto &writeSet = txn.writeSet; 47 | for (auto i = 0u; i < writeSet.size(); i++) { 48 | auto &writeKey = writeSet[i]; 49 | auto tableId = writeKey.get_table_id(); 50 | auto partitionId = writeKey.get_partition_id(); 51 | auto table = db.find_table(tableId, partitionId); 52 | 53 | if (partitioner.has_master_partition(partitionId)) { 54 | auto key = writeKey.get_key(); 55 | auto value = writeKey.get_value(); 56 | table->update(key, value); 57 | } else { 58 | auto coordinator_id = partitioner.master_coordinator(partitionId); 59 | messages[coordinator_id]->set_transaction_id(txn.transaction_id); 60 | auto sz = MessageFactoryType::new_write_message(*messages[coordinator_id], *table, writeKey.get_key(), writeKey.get_value()); 61 | txn.network_size.fetch_add(sz); 62 | txn.remote_write++; 63 | } 64 | } 65 | } 66 | 67 | private: 68 | DatabaseType &db; 69 | CalvinPartitioner &partitioner; 70 | }; 71 | } // namespace star -------------------------------------------------------------------------------- /core/Context.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 7/19/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include "common/WALLogger.h" 12 | 13 | namespace star { 14 | class Context { 15 | 16 | public: 17 | void set_star_partitioner() { 18 | if (protocol != "Star") { 19 | return; 20 | } 21 | if (coordinator_id == 0) { 22 | partitioner = "StarS"; 23 | } else { 24 | partitioner = "StarC"; 25 | } 26 | } 27 | 28 | public: 29 | std::size_t coordinator_id = 0; 30 | std::size_t partition_num = 0; 31 | std::size_t worker_num = 0; 32 | std::size_t coordinator_num = 0; 33 | std::size_t io_thread_num = 1; 34 | std::string protocol; 35 | std::string replica_group; 36 | std::string lock_manager; 37 | std::size_t batch_size = 240; // star, calvin, dbx batch size 38 | std::size_t batch_flush = 10; 39 | std::size_t group_time = 40; // ms 40 | std::size_t sleep_time = 50; // us 41 | std::string partitioner; 42 | std::size_t delay_time = 0; 43 | std::size_t wal_group_commit_time = 10;// us 44 | std::string log_path; 45 | std::string cdf_path; 46 | std::size_t cpu_core_id = 0; 47 | std::size_t cross_txn_workers = 0; 48 | bool hstore_command_logging = true; 49 | star::WALLogger * logger = nullptr; 50 | std::size_t group_commit_batch_size = 7; 51 | // https://www.storagereview.com/review/intel-ssd-dc-p4510-review 52 | // We emulate 110us write latency of Intel DC P4510 SSD. 53 | std::size_t emulated_persist_latency = 110; 54 | 55 | bool enable_hstore_master = false; 56 | 57 | bool tcp_no_delay = true; 58 | bool tcp_quick_ack = false; 59 | 60 | bool cpu_affinity = false; 61 | 62 | bool sleep_on_retry = true; 63 | 64 | bool read_on_replica = false; 65 | bool local_validation = false; 66 | bool rts_sync = false; 67 | bool star_sync_in_single_master_phase = false; 68 | bool star_dynamic_batch_size = true; 69 | bool parallel_locking_and_validation = true; 70 | 71 | bool calvin_same_batch = false; 72 | 73 | bool kiva_read_only_optmization = true; 74 | bool kiva_reordering_optmization = true; 75 | bool kiva_snapshot_isolation = false; 76 | bool operation_replication = false; 77 | 78 | bool aria_read_only_optmization = true; 79 | bool aria_reordering_optmization = false; 80 | bool aria_snapshot_isolation = false; 81 | 82 | std::vector peers; 83 | int stragglers_per_batch = 0; 84 | int stragglers_total_wait_time = 20000; 85 | int stragglers_partition = -1; 86 | int sender_group_nop_count = 40000; 87 | double straggler_zipf_factor = 0; 88 | std::size_t straggler_num_txn_len = 10; 89 | std::size_t granules_per_partition = 128; 90 | bool lotus_async_repl = false; 91 | int lotus_checkpoint = 0; 92 | std::string lotus_checkpoint_location; 93 | bool hstore_active_active = false; 94 | bool lotus_sp_parallel_exec_commit = false; 95 | }; 96 | } // namespace star 97 | -------------------------------------------------------------------------------- /core/ControlMessage.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 9/6/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include "common/Encoder.h" 8 | #include "common/Message.h" 9 | #include "common/MessagePiece.h" 10 | 11 | namespace star { 12 | 13 | enum class ControlMessage { STATISTICS, SIGNAL, ACK, STOP, NFIELDS }; 14 | 15 | class ControlMessageFactory { 16 | 17 | public: 18 | static std::size_t new_statistics_message(Message &message, int coordinator_id, double value) { 19 | /* 20 | * The structure of a statistics message: (statistics value : double) 21 | * 22 | */ 23 | 24 | // the message is not associated with a table or a partition, use 0. 25 | auto message_size = MessagePiece::get_header_size() + sizeof(double) + sizeof(coordinator_id); 26 | auto message_piece_header = MessagePiece::construct_message_piece_header( 27 | static_cast(ControlMessage::STATISTICS), message_size, 0, 0); 28 | 29 | Encoder encoder(message.data); 30 | encoder << message_piece_header; 31 | encoder << coordinator_id << value; 32 | message.flush(); 33 | message.set_gen_time(Time::now()); 34 | return message_size; 35 | } 36 | 37 | static std::size_t new_signal_message(Message &message, uint32_t value) { 38 | 39 | /* 40 | * The structure of a signal message: (signal value : uint32_t) 41 | */ 42 | 43 | // the message is not associated with a table or a partition, use 0. 44 | auto message_size = MessagePiece::get_header_size() + sizeof(uint32_t); 45 | auto message_piece_header = MessagePiece::construct_message_piece_header( 46 | static_cast(ControlMessage::SIGNAL), message_size, 0, 0); 47 | 48 | Encoder encoder(message.data); 49 | encoder << message_piece_header; 50 | encoder << value; 51 | message.flush(); 52 | message.set_gen_time(Time::now()); 53 | return message_size; 54 | } 55 | 56 | static std::size_t new_ack_message(Message &message) { 57 | /* 58 | * The structure of an ack message: () 59 | */ 60 | 61 | auto message_size = MessagePiece::get_header_size(); 62 | auto message_piece_header = MessagePiece::construct_message_piece_header( 63 | static_cast(ControlMessage::ACK), message_size, 0, 0); 64 | Encoder encoder(message.data); 65 | encoder << message_piece_header; 66 | message.flush(); 67 | message.set_gen_time(Time::now()); 68 | return message_size; 69 | } 70 | 71 | static std::size_t new_stop_message(Message &message) { 72 | /* 73 | * The structure of a stop message: () 74 | */ 75 | 76 | auto message_size = MessagePiece::get_header_size(); 77 | auto message_piece_header = MessagePiece::construct_message_piece_header( 78 | static_cast(ControlMessage::STOP), message_size, 0, 0); 79 | Encoder encoder(message.data); 80 | encoder << message_piece_header; 81 | message.flush(); 82 | message.set_gen_time(Time::now()); 83 | return message_size; 84 | } 85 | }; 86 | 87 | } // namespace star 88 | -------------------------------------------------------------------------------- /protocol/Star/StarQueryNum.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 9/21/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include "benchmark/tpcc/Context.h" 8 | #include "benchmark/ycsb/Context.h" 9 | 10 | namespace star { 11 | template class StarQueryNum { 12 | 13 | public: 14 | static std::size_t get_s_phase_query_num(const Context &context, 15 | uint32_t batch_size) { 16 | CHECK(false) << "not supported."; 17 | return 0; 18 | } 19 | 20 | static std::size_t get_c_phase_query_num(const Context &context, 21 | uint32_t batch_size) { 22 | CHECK(false) << "not supported."; 23 | return 0; 24 | } 25 | }; 26 | 27 | template <> class StarQueryNum { 28 | public: 29 | static std::size_t get_s_phase_query_num(const star::tpcc::Context &context, 30 | uint32_t batch_size) { 31 | if (context.workloadType == star::tpcc::TPCCWorkloadType::NEW_ORDER_ONLY) { 32 | return batch_size * (100 - context.newOrderCrossPartitionProbability) / 33 | 100; 34 | } else if (context.workloadType == 35 | star::tpcc::TPCCWorkloadType::PAYMENT_ONLY) { 36 | return batch_size * (100 - context.paymentCrossPartitionProbability) / 37 | 100; 38 | } else { 39 | return (batch_size * (100 - context.newOrderCrossPartitionProbability) / 40 | 100 + 41 | batch_size * (100 - context.paymentCrossPartitionProbability) / 42 | 100) / 43 | 2; 44 | } 45 | } 46 | 47 | static std::size_t get_c_phase_query_num(const star::tpcc::Context &context, 48 | uint32_t batch_size) { 49 | if (context.workloadType == star::tpcc::TPCCWorkloadType::NEW_ORDER_ONLY) { 50 | return context.coordinator_num * batch_size * 51 | context.newOrderCrossPartitionProbability / 100; 52 | } else if (context.workloadType == 53 | star::tpcc::TPCCWorkloadType::PAYMENT_ONLY) { 54 | return context.coordinator_num * batch_size * 55 | context.paymentCrossPartitionProbability / 100; 56 | } else { 57 | return context.coordinator_num * 58 | (batch_size * context.newOrderCrossPartitionProbability / 100 + 59 | batch_size * context.paymentCrossPartitionProbability / 100) / 60 | 2; 61 | } 62 | } 63 | }; 64 | 65 | template <> class StarQueryNum { 66 | public: 67 | static std::size_t get_s_phase_query_num(const star::ycsb::Context &context, 68 | uint32_t batch_size) { 69 | return batch_size * (100 - context.crossPartitionProbability) / 100.0; 70 | } 71 | 72 | static std::size_t get_c_phase_query_num(const star::ycsb::Context &context, 73 | uint32_t batch_size) { 74 | return context.coordinator_num * batch_size * 75 | context.crossPartitionProbability / 100.0; 76 | } 77 | }; 78 | } // namespace star 79 | -------------------------------------------------------------------------------- /common/ThreadPool.h: -------------------------------------------------------------------------------- 1 | // From https://github.com/progschj/ThreadPool/blob/master/ThreadPool.h 2 | #ifndef THREAD_POOL_H 3 | #define THREAD_POOL_H 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | class ThreadPool { 16 | public: 17 | ThreadPool(size_t); 18 | template 19 | auto enqueue(F&& f, Args&&... args) 20 | -> std::future::type>; 21 | ~ThreadPool(); 22 | private: 23 | // need to keep track of threads so we can join them 24 | std::vector< std::thread > workers; 25 | // the task queue 26 | std::queue< std::function > tasks; 27 | 28 | // synchronization 29 | std::mutex queue_mutex; 30 | std::condition_variable condition; 31 | bool stop; 32 | }; 33 | 34 | // the constructor just launches some amount of workers 35 | inline ThreadPool::ThreadPool(size_t threads) 36 | : stop(false) 37 | { 38 | for(size_t i = 0;i task; 45 | 46 | { 47 | std::unique_lock lock(this->queue_mutex); 48 | this->condition.wait(lock, 49 | [this]{ return this->stop || !this->tasks.empty(); }); 50 | if(this->stop && this->tasks.empty()) 51 | return; 52 | task = std::move(this->tasks.front()); 53 | this->tasks.pop(); 54 | } 55 | 56 | task(); 57 | } 58 | } 59 | ); 60 | } 61 | 62 | // add new work item to the pool 63 | template 64 | auto ThreadPool::enqueue(F&& f, Args&&... args) 65 | -> std::future::type> 66 | { 67 | using return_type = typename std::result_of::type; 68 | 69 | auto task = std::make_shared< std::packaged_task >( 70 | std::bind(std::forward(f), std::forward(args)...) 71 | ); 72 | 73 | std::future res = task->get_future(); 74 | { 75 | std::unique_lock lock(queue_mutex); 76 | 77 | // don't allow enqueueing after stopping the pool 78 | if(stop) 79 | throw std::runtime_error("enqueue on stopped ThreadPool"); 80 | 81 | tasks.emplace([task](){ (*task)(); }); 82 | } 83 | condition.notify_one(); 84 | return res; 85 | } 86 | 87 | // the destructor joins all threads 88 | inline ThreadPool::~ThreadPool() 89 | { 90 | { 91 | std::unique_lock lock(queue_mutex); 92 | stop = true; 93 | } 94 | condition.notify_all(); 95 | for(std::thread &worker: workers) 96 | worker.join(); 97 | } 98 | 99 | #endif -------------------------------------------------------------------------------- /bench_tpcc.cpp: -------------------------------------------------------------------------------- 1 | #include "benchmark/tpcc/Database.h" 2 | #include "core/Coordinator.h" 3 | #include "core/Macros.h" 4 | 5 | DEFINE_bool(operation_replication, false, "use operation replication"); 6 | DEFINE_string(query, "neworder", "tpcc query, mixed, neworder, payment"); 7 | DEFINE_int32(neworder_dist, 10, "new order distributed."); 8 | DEFINE_int32(payment_dist, 15, "payment distributed."); 9 | 10 | // ./main --logtostderr=1 --id=1 --servers="127.0.0.1:10010;127.0.0.1:10011" 11 | // cmake -DCMAKE_BUILD_TYPE=Release 12 | bool do_tid_check = false; 13 | 14 | int main(int argc, char *argv[]) { 15 | star::tpcc::Random r; 16 | std::vector cnt(100, 0); 17 | for (size_t i = 0; i < 1000000; ++i) { 18 | auto x = r.non_uniform_distribution(8191, 1, 100000) % 1500; 19 | if (x < cnt.size()) { 20 | cnt[x]++; 21 | } 22 | } 23 | // for (size_t i = 1; i < cnt.size(); ++i) { 24 | // LOG(INFO) << "i " << i << " " << cnt[i]; 25 | // } 26 | google::InitGoogleLogging(argv[0]); 27 | google::InstallFailureSignalHandler(); 28 | google::ParseCommandLineFlags(&argc, &argv, true); 29 | 30 | star::tpcc::Context context; 31 | SETUP_CONTEXT(context); 32 | 33 | context.operation_replication = FLAGS_operation_replication; 34 | 35 | context.granules_per_partition = FLAGS_granule_count; 36 | 37 | if (FLAGS_query == "mixed") { 38 | context.workloadType = star::tpcc::TPCCWorkloadType::MIXED; 39 | } else if (FLAGS_query == "neworder") { 40 | context.workloadType = star::tpcc::TPCCWorkloadType::NEW_ORDER_ONLY; 41 | } else if (FLAGS_query == "payment") { 42 | context.workloadType = star::tpcc::TPCCWorkloadType::PAYMENT_ONLY; 43 | } else { 44 | CHECK(false); 45 | } 46 | 47 | context.newOrderCrossPartitionProbability = FLAGS_neworder_dist; 48 | context.paymentCrossPartitionProbability = FLAGS_payment_dist; 49 | 50 | if (context.log_path != "" && context.wal_group_commit_time != 0) { 51 | std::string redo_filename = 52 | context.log_path + "_group_commit.txt"; 53 | std::string logger_type = "GroupCommit Logger"; 54 | if (context.lotus_checkpoint == LotusCheckpointScheme::COW_ON_CHECKPOINT_ON_LOGGING_OFF) { // logging off so that logging and checkpoint threads will not compete for bandwidth 55 | logger_type = "Blackhole Logger"; 56 | context.logger = new star::BlackholeLogger(redo_filename, context.emulated_persist_latency); 57 | } else { 58 | context.logger = new star::GroupCommitLogger(redo_filename, context.group_commit_batch_size, context.wal_group_commit_time, context.emulated_persist_latency); 59 | } 60 | LOG(INFO) << "WAL Group Commiting to file [" << redo_filename << "]" << " using " << logger_type; 61 | } else { 62 | std::string redo_filename = 63 | context.log_path + "_non_group_commit.txt"; 64 | context.logger = new star::SimpleWALLogger(redo_filename, context.emulated_persist_latency); 65 | LOG(INFO) << "WAL Group Commiting off"; 66 | } 67 | star::tpcc::Database db; 68 | db.initialize(context); 69 | 70 | do_tid_check = false; 71 | star::Coordinator c(FLAGS_id, db, context); 72 | c.connectToPeers(); 73 | c.start(); 74 | return 0; 75 | } -------------------------------------------------------------------------------- /protocol/Silo/SiloExecutor.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 9/11/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include "core/Executor.h" 8 | #include "protocol/Silo/Silo.h" 9 | 10 | namespace star { 11 | template 12 | class SiloExecutor 13 | : public Executor> 14 | 15 | { 16 | public: 17 | using base_type = Executor>; 18 | 19 | using WorkloadType = Workload; 20 | using ProtocolType = Silo; 21 | using DatabaseType = typename WorkloadType::DatabaseType; 22 | using TransactionType = typename WorkloadType::TransactionType; 23 | using ContextType = typename DatabaseType::ContextType; 24 | using RandomType = typename DatabaseType::RandomType; 25 | using MessageType = typename ProtocolType::MessageType; 26 | using MessageFactoryType = typename ProtocolType::MessageFactoryType; 27 | using MessageHandlerType = typename ProtocolType::MessageHandlerType; 28 | 29 | using StorageType = typename WorkloadType::StorageType; 30 | 31 | SiloExecutor(std::size_t coordinator_id, std::size_t id, DatabaseType &db, 32 | const ContextType &context, std::atomic &worker_status, 33 | std::atomic &n_complete_workers, 34 | std::atomic &n_started_workers) 35 | : base_type(coordinator_id, id, db, context, worker_status, 36 | n_complete_workers, n_started_workers) {} 37 | 38 | ~ 39 | 40 | SiloExecutor() = default; 41 | 42 | void setupHandlers(TransactionType &txn) 43 | 44 | override { 45 | txn.readRequestHandler = 46 | [this, &txn](std::size_t table_id, std::size_t partition_id, 47 | uint32_t key_offset, const void *key, void *value, 48 | bool local_index_read) -> uint64_t { 49 | bool local_read = false; 50 | 51 | if (this->partitioner->has_master_partition(partition_id) || 52 | (this->partitioner->is_partition_replicated_on( 53 | partition_id, this->coordinator_id) && 54 | this->context.read_on_replica)) { 55 | local_read = true; 56 | } 57 | 58 | if (local_index_read || local_read) { 59 | return this->protocol.search(table_id, partition_id, key, value); 60 | } else { 61 | ITable *table = this->db.find_table(table_id, partition_id); 62 | auto coordinatorID = 63 | this->partitioner->master_coordinator(partition_id); 64 | txn.network_size += MessageFactoryType::new_search_message( 65 | *(this->messages[coordinatorID]), *table, key, key_offset); 66 | txn.pendingResponses++; 67 | txn.distributed_transaction = true; 68 | return 0; 69 | } 70 | }; 71 | 72 | txn.remote_request_handler = [this](std::size_t) { return this->process_request(); }; 73 | txn.message_flusher = [this]() { this->flush_messages(); }; 74 | txn.get_table = [this](std::size_t tableId, std::size_t partitionId) { return this->db.find_table(tableId, partitionId); }; 75 | txn.set_logger(this->logger); 76 | }; 77 | }; 78 | } // namespace star 79 | -------------------------------------------------------------------------------- /protocol/SiloGC/SiloGCExecutor.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 9/11/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include "core/group_commit/Executor.h" 8 | #include "protocol/SiloGC/SiloGC.h" 9 | 10 | namespace star { 11 | template 12 | class SiloGCExecutor 13 | : public group_commit::Executor> 15 | 16 | { 17 | public: 18 | using base_type = 19 | group_commit::Executor>; 20 | 21 | using WorkloadType = Workload; 22 | using ProtocolType = SiloGC; 23 | using DatabaseType = typename WorkloadType::DatabaseType; 24 | using TransactionType = typename WorkloadType::TransactionType; 25 | using ContextType = typename DatabaseType::ContextType; 26 | using RandomType = typename DatabaseType::RandomType; 27 | using MessageType = typename ProtocolType::MessageType; 28 | using MessageFactoryType = typename ProtocolType::MessageFactoryType; 29 | using MessageHandlerType = typename ProtocolType::MessageHandlerType; 30 | 31 | using StorageType = typename WorkloadType::StorageType; 32 | 33 | SiloGCExecutor(std::size_t coordinator_id, std::size_t id, DatabaseType &db, 34 | const ContextType &context, 35 | std::atomic &worker_status, 36 | std::atomic &n_complete_workers, 37 | std::atomic &n_started_workers) 38 | : base_type(coordinator_id, id, db, context, worker_status, 39 | n_complete_workers, n_started_workers) {} 40 | 41 | ~ 42 | 43 | SiloGCExecutor() = default; 44 | 45 | void setupHandlers(TransactionType &txn) 46 | 47 | override { 48 | 49 | txn.readRequestHandler = 50 | [this, &txn](std::size_t table_id, std::size_t partition_id, 51 | uint32_t key_offset, const void *key, void *value, 52 | bool local_index_read) -> uint64_t { 53 | bool local_read = false; 54 | 55 | if (this->partitioner->has_master_partition(partition_id) || 56 | (this->partitioner->is_partition_replicated_on( 57 | partition_id, this->coordinator_id) && 58 | this->context.read_on_replica)) { 59 | local_read = true; 60 | } 61 | 62 | if (local_index_read || local_read) { 63 | return this->protocol.search(table_id, partition_id, key, value); 64 | } else { 65 | ITable *table = this->db.find_table(table_id, partition_id); 66 | auto coordinatorID = 67 | this->partitioner->master_coordinator(partition_id); 68 | txn.network_size += MessageFactoryType::new_search_message( 69 | *(this->sync_messages[coordinatorID]), *table, key, key_offset); 70 | txn.distributed_transaction = true; 71 | txn.pendingResponses++; 72 | return 0; 73 | } 74 | }; 75 | 76 | txn.remote_request_handler = [this](std::size_t) { return this->process_request(); }; 77 | txn.message_flusher = [this]() { this->flush_sync_messages(); }; 78 | txn.get_table = [this](std::size_t tableId, std::size_t partitionId) { return this->db.find_table(tableId, partitionId); }; 79 | }; 80 | }; 81 | } // namespace star -------------------------------------------------------------------------------- /common/StringPiece.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 8/28/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | #include 11 | 12 | /* 13 | * StringPiece is adapted from 14 | * https://github.com/protocolbuffers/protobuf/blob/master/src/google/protobuf/stubs/stringpiece.h 15 | */ 16 | 17 | namespace star { 18 | 19 | class StringPiece { 20 | 21 | public: 22 | using iterator = const char *; 23 | using size_type = std::size_t; 24 | 25 | StringPiece() : data_(nullptr), length_(0) {} 26 | 27 | StringPiece(const char *str) : data_(str), length_(0) { 28 | if (data_ != nullptr) { 29 | length_ = strlen(data_); 30 | } 31 | } 32 | 33 | StringPiece(const char *str, size_type length) 34 | : data_(str), length_(length) {} 35 | 36 | StringPiece(const std::string &str) 37 | : data_(str.data()), length_(str.length()) {} 38 | 39 | StringPiece(const StringPiece &that) 40 | : data_(that.data_), length_(that.length_) {} 41 | 42 | const char *data() const { return data_; } 43 | 44 | size_type size() const { return length_; } 45 | 46 | size_type length() const { return length_; } 47 | 48 | bool empty() const { return length_ == 0; } 49 | 50 | void clear() { 51 | data_ = nullptr; 52 | length_ = 0; 53 | } 54 | 55 | void set(const char *data, size_type length) { 56 | data_ = data; 57 | length_ = length; 58 | } 59 | 60 | void set(const char *data) { 61 | data_ = data; 62 | if (data_ == nullptr) { 63 | length_ = 0; 64 | } else { 65 | length_ = strlen(data); 66 | } 67 | } 68 | 69 | char operator[](size_type i) const { 70 | DCHECK(i < length_); 71 | return data_[i]; 72 | } 73 | 74 | void remove_prefix(size_type len) { 75 | DCHECK(len <= length_); 76 | data_ += len; 77 | length_ -= len; 78 | } 79 | 80 | void remove_suffix(size_type len) { 81 | DCHECK(len <= length_); 82 | length_ -= len; 83 | } 84 | 85 | int compare(const StringPiece &that) const { 86 | size_type minSize = length_ < that.length_ ? length_ : that.length_; 87 | int r = strncmp(data_, that.data_, minSize); 88 | if (r < 0) 89 | return -1; 90 | if (r > 0) 91 | return 1; 92 | if (length_ < that.length_) 93 | return -1; 94 | if (length_ > that.length_) 95 | return 1; 96 | return 0; 97 | } 98 | 99 | bool operator<(const StringPiece &that) const { return compare(that) < 0; } 100 | 101 | bool operator<=(const StringPiece &that) const { return compare(that) <= 0; } 102 | 103 | bool operator>(const StringPiece &that) const { return compare(that) > 0; } 104 | 105 | bool operator>=(const StringPiece &that) const { return compare(that) >= 0; } 106 | 107 | bool operator==(const StringPiece &that) const { return compare(that) == 0; } 108 | 109 | bool operator!=(const StringPiece &that) const { return compare(that) != 0; } 110 | 111 | std::string toString() const { 112 | if (data_ == nullptr) 113 | return std::string(); 114 | else 115 | return std::string(data_, length_); 116 | } 117 | 118 | iterator begin() const { return data_; } 119 | 120 | iterator end() const { return data_ + length_; } 121 | 122 | private: 123 | const char *data_; 124 | size_t length_; 125 | }; 126 | } // namespace star -------------------------------------------------------------------------------- /protocol/Aria/AriaRWKey.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 1/7/19. 3 | // 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | namespace star { 10 | 11 | class AriaRWKey { 12 | public: 13 | // local index read bit 14 | 15 | void set_local_index_read_bit() { 16 | clear_local_index_read_bit(); 17 | bitvec |= LOCAL_INDEX_READ_BIT_MASK << LOCAL_INDEX_READ_BIT_OFFSET; 18 | } 19 | 20 | void clear_local_index_read_bit() { 21 | bitvec &= ~(LOCAL_INDEX_READ_BIT_MASK << LOCAL_INDEX_READ_BIT_OFFSET); 22 | } 23 | 24 | uint32_t get_local_index_read_bit() const { 25 | return (bitvec >> LOCAL_INDEX_READ_BIT_OFFSET) & LOCAL_INDEX_READ_BIT_MASK; 26 | } 27 | 28 | // read request bit 29 | 30 | void set_read_request_bit() { 31 | clear_read_request_bit(); 32 | bitvec |= READ_REQUEST_BIT_MASK << READ_REQUEST_BIT_OFFSET; 33 | } 34 | 35 | void clear_read_request_bit() { 36 | bitvec &= ~(READ_REQUEST_BIT_MASK << READ_REQUEST_BIT_OFFSET); 37 | } 38 | 39 | uint32_t get_read_request_bit() const { 40 | return (bitvec >> READ_REQUEST_BIT_OFFSET) & READ_REQUEST_BIT_MASK; 41 | } 42 | 43 | // table id 44 | 45 | void set_table_id(uint32_t table_id) { 46 | DCHECK(table_id < (1 << 5)); 47 | clear_table_id(); 48 | bitvec |= table_id << TABLE_ID_OFFSET; 49 | } 50 | 51 | void clear_table_id() { bitvec &= ~(TABLE_ID_MASK << TABLE_ID_OFFSET); } 52 | 53 | uint32_t get_table_id() const { 54 | return (bitvec >> TABLE_ID_OFFSET) & TABLE_ID_MASK; 55 | } 56 | // partition id 57 | 58 | void set_partition_id(uint32_t partition_id) { 59 | DCHECK(partition_id < (1 << 16)); 60 | clear_partition_id(); 61 | bitvec |= partition_id << PARTITION_ID_OFFSET; 62 | } 63 | 64 | void clear_partition_id() { 65 | bitvec &= ~(PARTITION_ID_MASK << PARTITION_ID_OFFSET); 66 | } 67 | 68 | uint32_t get_partition_id() const { 69 | return (bitvec >> PARTITION_ID_OFFSET) & PARTITION_ID_MASK; 70 | } 71 | 72 | // key 73 | void set_key(const void *key) { this->key = key; } 74 | 75 | const void *get_key() const { return key; } 76 | 77 | // value 78 | void set_value(void *value) { this->value = value; } 79 | 80 | void *get_value() const { return value; } 81 | 82 | void set_tid(std::atomic *tid) { this->tid = tid; }; 83 | 84 | std::atomic *get_tid() const { return tid; }; 85 | 86 | private: 87 | /* 88 | * A bitvec is a 32-bit word. 89 | * 90 | * [ table id (5) ] | partition id (16) | unused bit (9) | 91 | * read request bit (1) | local index read (1) ] 92 | * 93 | * local index read is set when the read is from a local read only index. 94 | */ 95 | 96 | uint32_t bitvec = 0; 97 | const void *key = nullptr; 98 | void *value = nullptr; 99 | std::atomic *tid = nullptr; 100 | 101 | public: 102 | static constexpr uint32_t TABLE_ID_MASK = 0x1f; 103 | static constexpr uint32_t TABLE_ID_OFFSET = 27; 104 | 105 | static constexpr uint32_t PARTITION_ID_MASK = 0xffff; 106 | static constexpr uint32_t PARTITION_ID_OFFSET = 11; 107 | 108 | static constexpr uint32_t READ_REQUEST_BIT_MASK = 0x1; 109 | static constexpr uint32_t READ_REQUEST_BIT_OFFSET = 1; 110 | 111 | static constexpr uint32_t LOCAL_INDEX_READ_BIT_MASK = 0x1; 112 | static constexpr uint32_t LOCAL_INDEX_READ_BIT_OFFSET = 0; 113 | }; 114 | } // namespace aria -------------------------------------------------------------------------------- /benchmark/ycsb/Schema.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 7/15/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include "common/ClassOf.h" 8 | #include "common/FixedString.h" 9 | #include "common/Hash.h" 10 | #include "common/Serialization.h" 11 | #include "core/SchemaDef.h" 12 | 13 | namespace star { 14 | namespace ycsb { 15 | static constexpr auto __BASE_COUNTER__ = __COUNTER__ + 1; 16 | static constexpr auto YCSB_FIELD_SIZE = 10; 17 | } // namespace ycsb 18 | } // namespace star 19 | 20 | #undef NAMESPACE_FIELDS 21 | #define NAMESPACE_FIELDS(x) x(star) x(ycsb) 22 | 23 | #define YCSB_KEY_FIELDS(x, y) x(int32_t, Y_KEY) 24 | #define YCSB_VALUE_FIELDS(x, y) \ 25 | x(FixedString, Y_F01) \ 26 | y(FixedString, Y_F02) \ 27 | y(FixedString, Y_F03) \ 28 | y(FixedString, Y_F04) \ 29 | y(FixedString, Y_F05) \ 30 | y(FixedString, Y_F06) \ 31 | y(FixedString, Y_F07) \ 32 | y(FixedString, Y_F08) \ 33 | y(FixedString, Y_F09) \ 34 | y(FixedString, Y_F10) 35 | 36 | DO_STRUCT(ycsb, YCSB_KEY_FIELDS, YCSB_VALUE_FIELDS, NAMESPACE_FIELDS) 37 | 38 | namespace star { 39 | 40 | template <> class Serializer { 41 | public: 42 | std::string operator()(const ycsb::ycsb::value &v) { 43 | return Serializer()(v.Y_F01) + 44 | Serializer()(v.Y_F02) + 45 | Serializer()(v.Y_F03) + 46 | Serializer()(v.Y_F04) + 47 | Serializer()(v.Y_F05) + 48 | Serializer()(v.Y_F06) + 49 | Serializer()(v.Y_F07) + 50 | Serializer()(v.Y_F08) + 51 | Serializer()(v.Y_F09) + 52 | Serializer()(v.Y_F10); 53 | } 54 | }; 55 | 56 | template <> class Deserializer { 57 | public: 58 | std::size_t operator()(StringPiece str, ycsb::ycsb::value &result) const { 59 | 60 | std::size_t sz = Deserializer()(str, result.Y_F01); 61 | str.remove_prefix(sz); 62 | Deserializer()(str, result.Y_F02); 63 | str.remove_prefix(sz); 64 | Deserializer()(str, result.Y_F03); 65 | str.remove_prefix(sz); 66 | Deserializer()(str, result.Y_F04); 67 | str.remove_prefix(sz); 68 | Deserializer()(str, result.Y_F05); 69 | str.remove_prefix(sz); 70 | Deserializer()(str, result.Y_F06); 71 | str.remove_prefix(sz); 72 | Deserializer()(str, result.Y_F07); 73 | str.remove_prefix(sz); 74 | Deserializer()(str, result.Y_F08); 75 | str.remove_prefix(sz); 76 | Deserializer()(str, result.Y_F09); 77 | str.remove_prefix(sz); 78 | Deserializer()(str, result.Y_F10); 79 | str.remove_prefix(sz); 80 | return sz * 10; 81 | } 82 | }; 83 | 84 | template <> class ClassOf { 85 | public: 86 | static constexpr std::size_t size() { 87 | return ClassOf::size() * 10; 88 | } 89 | }; 90 | 91 | } // namespace star -------------------------------------------------------------------------------- /README.MD: -------------------------------------------------------------------------------- 1 | **Xinjing Zhou**, Xiangyao Yu, Goetz Graefe, Micheal Stonebraker 2 | 3 | [Lotus: Scalable Multi-Partition Transactions on Single-Threaded Partitioned Databases](https://doi.org/10.14778/3551793.3551843) 4 | 5 | *Proc. of the VLDB Endowment (PVLDB), Volume 15, Sydney, Australia, 2022.* 6 | 7 | This repository contains source code for Lotus. The code is based on the [star](https://github.com/luyi0619/star) framework from Yi Lu. 8 | 9 | # Dependencies 10 | 11 | ```sh 12 | sudo apt-get update 13 | sudo apt-get install -y zip make cmake g++ libjemalloc-dev libboost-dev libgoogle-glog-dev 14 | ``` 15 | 16 | # Download 17 | 18 | ```sh 19 | git clone https://github.com/DBOS-project/lotus.git 20 | ``` 21 | 22 | # Build 23 | 24 | ``` 25 | ./compile.sh 26 | ``` 27 | 28 | # Reproducing Experiments 29 | 30 | Note that the tutorial only works for Google Cloud Compute Engine. 31 | 32 | Make sure you have placed the source code folder under `~`. 33 | 34 | Make sure every node in the cluster has installed all the software dependencies. 35 | 36 | Make sure the benchmark has been compiled on every node using `compile.sh`. 37 | 38 | We assume the log files for transactions are placed under `/mnt/disks/nvme/`. 39 | 40 | The sample scripts provided run on 6 nodes. 41 | 42 | ## Figure 9(a): Comparison with Non-Deterministic Systems 43 | 44 | 1. Fill in `scripts/ips.txt` with ip addresses of the nodes you want to run the experiments on. 45 | 2. Fill in `scripts/instance_names.txt` with corresponding instance names (Google Cloud Compute Engine instance name) of the nodes supplied in `scripts/ips.txt`. 46 | 4. Run the following bash code on the first node of the cluster to distribute benchmarking scripts to other nodes. 47 | ```bash 48 | cd scripts 49 | # port to run the experiments on 50 | # | 51 | python distribute_script.py 1234 gc_2pl_mp_ycsb.py us-central1-a # ----- Google Cloud region name 52 | # | 53 | # baseline-specific distribution script 54 | python distribute_script.py 1234 gc_sundial_mp_ycsb.py us-central1-a 55 | python distribute_script.py 1234 gc_hstore_mp_ycsb.py us-central1-a 56 | python distribute_script.py 1234 gc_lotus_mp_ycsb.py us-central1-a 57 | ``` 58 | 5. Run the following code on the first node of the cluster to start the benchmark 59 | ```bash 60 | sh run_2pl_mp_ycsb.sh 61 | sh run_sundial_mp_ycsb.sh 62 | sh run_hstore_mp_ycsb.sh 63 | sh run_lotus_mp_ycsb_sync.sh 64 | ``` 65 | 6. Results are placed under `~/exp_results` on the first node of the cluster. 66 | 67 | ## Figure 10(a): Comparison with Deterministic Systems 68 | 69 | 1. Fill in `scripts/ips.txt` with ip addresses of the nodes you want to run the experiments on. 70 | 2. Fill in `scripts/instance_names.txt` with corresponding instance names (Google Cloud Compute Engine instance name) of the nodes supplied in `scripts/ips.txt`. 71 | 3. Fill in `scripts/ips_half.txt` with first half of the nodes from `scripts/ips.txt`. This is for Calvin and Aria baselines that only need to evaluate the performance of one replica (3 nodes). 72 | 4. Fill in `scripts/instance_names_half.txt` with first half of the nodes from `scripts/instance_names.txt`. 73 | 5. Run the following bash code on the first node of the cluster to distribute benchmarking scripts to other nodes. 74 | ```bash 75 | cd scripts 76 | python distribute_script_half.py 1234 gc_aria_mp_ycsb.py us-central1-a 77 | python distribute_script_half.py 1234 gc_calvin_mp_ycsb.py us-central1-a 78 | python distribute_script.py 1234 gc_lotus_mp_ycsb.py us-central1-a 79 | ``` 80 | 6. Run the following code on the first node of the cluster to start the benchmark 81 | ```bash 82 | sh run_aria_mp_ycsb.sh 83 | sh run_calvin_mp_ycsb.sh 84 | sh run_lotus_mp_ycsb.sh 85 | ``` 86 | 7. Results are placed under `~/exp_results` on the first node of the cluster. 87 | -------------------------------------------------------------------------------- /common/FixedString.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 7/13/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include "ClassOf.h" 12 | #include "Hash.h" 13 | #include "Serialization.h" 14 | #include "StringPiece.h" 15 | 16 | namespace star { 17 | 18 | template class FixedString { 19 | public: 20 | static_assert(N > 0, "string length should be positive."); 21 | 22 | using size_type = std::size_t; 23 | 24 | FixedString() { assign(""); } 25 | 26 | FixedString(const char *str) { assign(std::string(str)); } 27 | 28 | FixedString(const std::string &str) { assign(str); } 29 | 30 | int compare(const FixedString &that) const { 31 | 32 | for (auto i = 0u; i < N; i++) { 33 | if (data_[i] < that.data_[i]) { 34 | return -1; 35 | } 36 | 37 | if (data_[i] > that.data_[i]) { 38 | return 1; 39 | } 40 | } 41 | return 0; 42 | } 43 | 44 | bool operator<(const FixedString &that) const { return compare(that) < 0; } 45 | 46 | bool operator<=(const FixedString &that) const { return compare(that) <= 0; } 47 | 48 | bool operator>(const FixedString &that) const { return compare(that) > 0; } 49 | 50 | bool operator>=(const FixedString &that) const { return compare(that) >= 0; } 51 | 52 | bool operator==(const FixedString &that) const { return compare(that) == 0; } 53 | 54 | bool operator!=(const FixedString &that) const { return compare(that) != 0; } 55 | 56 | FixedString &assign(const std::string &str) { 57 | return assign(str, str.length()); 58 | } 59 | 60 | FixedString &assign(const std::string &str, size_type length) { 61 | DCHECK(length <= str.length()); 62 | DCHECK(length <= N); 63 | std::copy(str.begin(), str.begin() + length, data_.begin()); 64 | DCHECK(data_.begin() + length <= data_.end() - 1); 65 | std::fill(data_.begin() + length, data_.end() - 1, ' '); 66 | data_[N] = 0; 67 | return *this; 68 | } 69 | 70 | const char *c_str() { return &data_[0]; } 71 | 72 | std::size_t hash_code() const { 73 | std::hash h; 74 | std::size_t hashCode = 0; 75 | for (auto i = 0u; i < N; i++) { 76 | hashCode = star::hash_combine(hashCode, h(data_[i])); 77 | } 78 | return hashCode; 79 | } 80 | 81 | constexpr size_type length() const { return N; } 82 | 83 | constexpr size_type size() const { return N; } 84 | 85 | std::string toString() const { 86 | std::string str; 87 | // the last char is \0 88 | std::copy(data_.begin(), data_.end() - 1, std::back_inserter(str)); 89 | DCHECK(str.length() == N); 90 | return str; 91 | } 92 | 93 | private: 94 | std::array data_; 95 | }; 96 | 97 | template 98 | inline std::basic_ostream &operator<<(std::basic_ostream &os, 99 | const FixedString &str) { 100 | os << str.toString(); 101 | return os; 102 | } 103 | 104 | template class Serializer> { 105 | public: 106 | std::string operator()(const FixedString &v) { return v.toString(); } 107 | }; 108 | 109 | template class Deserializer> { 110 | public: 111 | std::size_t operator()(StringPiece str, FixedString &result) const { 112 | result.assign(str.data(), N); 113 | return N; 114 | } 115 | }; 116 | 117 | template class ClassOf> { 118 | public: 119 | static constexpr std::size_t size() { return N; } 120 | }; 121 | 122 | } // namespace star 123 | 124 | namespace std { 125 | template struct hash> { 126 | std::size_t operator()(const star::FixedString &k) const { 127 | return k.hash_code(); 128 | } 129 | }; 130 | } // namespace std 131 | -------------------------------------------------------------------------------- /protocol/TwoPL/TwoPLExecutor.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 9/11/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include "core/Executor.h" 8 | #include "protocol/TwoPL/TwoPL.h" 9 | 10 | namespace star { 11 | template 12 | class TwoPLExecutor 13 | : public Executor> 14 | 15 | { 16 | public: 17 | using base_type = Executor>; 18 | 19 | using WorkloadType = Workload; 20 | using ProtocolType = TwoPL; 21 | using DatabaseType = typename WorkloadType::DatabaseType; 22 | using TransactionType = typename WorkloadType::TransactionType; 23 | using ContextType = typename DatabaseType::ContextType; 24 | using RandomType = typename DatabaseType::RandomType; 25 | using MessageType = typename ProtocolType::MessageType; 26 | using MessageFactoryType = typename ProtocolType::MessageFactoryType; 27 | using MessageHandlerType = typename ProtocolType::MessageHandlerType; 28 | 29 | using StorageType = typename WorkloadType::StorageType; 30 | 31 | TwoPLExecutor(std::size_t coordinator_id, std::size_t id, DatabaseType &db, 32 | const ContextType &context, 33 | std::atomic &worker_status, 34 | std::atomic &n_complete_workers, 35 | std::atomic &n_started_workers) 36 | : base_type(coordinator_id, id, db, context, worker_status, 37 | n_complete_workers, n_started_workers) {} 38 | 39 | ~ 40 | 41 | TwoPLExecutor() = default; 42 | 43 | void setupHandlers(TransactionType &txn) 44 | 45 | override { 46 | txn.lock_request_handler = 47 | [this, &txn](std::size_t table_id, std::size_t partition_id, 48 | uint32_t key_offset, const void *key, void *value, 49 | bool local_index_read, bool write_lock, bool &success, 50 | bool &remote) -> uint64_t { 51 | if (local_index_read) { 52 | success = true; 53 | remote = false; 54 | return this->protocol.search(table_id, partition_id, key, value); 55 | } 56 | 57 | ITable *table = this->db.find_table(table_id, partition_id); 58 | 59 | if (this->partitioner->has_master_partition(partition_id)) { 60 | 61 | remote = false; 62 | 63 | std::atomic &tid = table->search_metadata(key); 64 | 65 | if (write_lock) { 66 | TwoPLHelper::write_lock(tid, success); 67 | } else { 68 | TwoPLHelper::read_lock(tid, success); 69 | } 70 | 71 | if (success) { 72 | return this->protocol.search(table_id, partition_id, key, value); 73 | } else { 74 | return 0; 75 | } 76 | 77 | } else { 78 | 79 | remote = true; 80 | 81 | auto coordinatorID = 82 | this->partitioner->master_coordinator(partition_id); 83 | 84 | if (write_lock) { 85 | txn.network_size += MessageFactoryType::new_write_lock_message( 86 | *(this->messages[coordinatorID]), *table, key, key_offset); 87 | } else { 88 | txn.network_size += MessageFactoryType::new_read_lock_message( 89 | *(this->messages[coordinatorID]), *table, key, key_offset); 90 | } 91 | txn.distributed_transaction = true; 92 | return 0; 93 | } 94 | }; 95 | 96 | txn.remote_request_handler = [this](std::size_t) { return this->process_request(); }; 97 | txn.message_flusher = [this]() { this->flush_messages(); }; 98 | txn.get_table = [this](std::size_t tableId, std::size_t partitionId) { return this->db.find_table(tableId, partitionId); }; 99 | txn.set_logger(this->logger); 100 | }; 101 | }; 102 | } // namespace star 103 | -------------------------------------------------------------------------------- /bench_ycsb.cpp: -------------------------------------------------------------------------------- 1 | #include "benchmark/ycsb/Database.h" 2 | #include "core/Coordinator.h" 3 | #include "core/Macros.h" 4 | #include "common/WALLogger.h" 5 | 6 | DEFINE_bool(lotus_sp_parallel_exec_commit, false, "parallel execution and commit for Lotus"); 7 | DEFINE_int32(read_write_ratio, 80, "read write ratio"); 8 | DEFINE_int32(read_only_ratio, 0, "read only transaction ratio"); 9 | DEFINE_int32(cross_ratio, 0, "cross partition transaction ratio"); 10 | DEFINE_int32(keys, 200000, "keys in a partition."); 11 | DEFINE_double(zipf, 0, "skew factor"); 12 | DEFINE_int32(cross_part_num, 2, "Cross-partition partion #"); 13 | 14 | DEFINE_int32(nop_prob, 0, "prob of transactions having nop, out of 10000"); 15 | DEFINE_int64(n_nop, 0, "total number of nop"); 16 | 17 | // ./main --logtostderr=1 --id=1 --servers="127.0.0.1:10010;127.0.0.1:10011" 18 | // cmake -DCMAKE_BUILD_TYPE=Release 19 | 20 | bool do_tid_check = false; 21 | 22 | int main(int argc, char *argv[]) { 23 | 24 | google::InitGoogleLogging(argv[0]); 25 | google::InstallFailureSignalHandler(); 26 | google::ParseCommandLineFlags(&argc, &argv, true); 27 | 28 | star::ycsb::Context context; 29 | SETUP_CONTEXT(context); 30 | 31 | context.readWriteRatio = FLAGS_read_write_ratio; 32 | context.readOnlyTransaction = FLAGS_read_only_ratio; 33 | context.crossPartitionProbability = FLAGS_cross_ratio; 34 | context.keysPerPartition = FLAGS_keys; 35 | context.lotus_sp_parallel_exec_commit = FLAGS_lotus_sp_parallel_exec_commit; 36 | context.crossPartitionPartNum = FLAGS_cross_part_num; 37 | context.nop_prob = FLAGS_nop_prob; 38 | context.n_nop = FLAGS_n_nop; 39 | 40 | context.granules_per_partition = FLAGS_granule_count; 41 | context.keysPerGranule = context.keysPerPartition / context.granules_per_partition; 42 | 43 | LOG(INFO) << "checkpoint " << context.lotus_checkpoint << " to " << context.lotus_checkpoint_location; 44 | LOG(INFO) << "cross_part_num " << FLAGS_cross_part_num; 45 | LOG(INFO) << "lotus_sp_parallel_exec_commit " << FLAGS_lotus_sp_parallel_exec_commit; 46 | LOG(INFO) << "granules_per_partition " << context.granules_per_partition; 47 | LOG(INFO) << "keysPerGranule " << context.keysPerGranule; 48 | 49 | star::ycsb::Context::unit_testing(&context); 50 | if (FLAGS_zipf > 0) { 51 | context.isUniform = false; 52 | star::Zipf::globalZipf().init(context.keysPerPartition * context.partition_num, FLAGS_zipf); 53 | } 54 | 55 | if (FLAGS_stragglers_zipf_factor > 0) { 56 | star::Zipf::globalZipfForStraggler().init(context.straggler_num_txn_len, FLAGS_stragglers_zipf_factor); 57 | } 58 | 59 | if (context.log_path != "" && context.wal_group_commit_time != 0) { 60 | std::string redo_filename = 61 | context.log_path + "_group_commit.txt"; 62 | std::string logger_type = "GroupCommit Logger"; 63 | if (context.lotus_checkpoint == LotusCheckpointScheme::COW_ON_CHECKPOINT_ON_LOGGING_OFF) { // logging off so that logging and checkpoint threads will not compete for bandwidth 64 | logger_type = "Blackhole Logger"; 65 | context.logger = new star::BlackholeLogger(redo_filename, context.emulated_persist_latency); 66 | } else { 67 | context.logger = new star::GroupCommitLogger(redo_filename, context.group_commit_batch_size, context.wal_group_commit_time, context.emulated_persist_latency); 68 | } 69 | LOG(INFO) << "WAL Group Commiting to file [" << redo_filename << "]" << " using " << logger_type; 70 | } else { 71 | std::string redo_filename = 72 | context.log_path + "_non_group_commit.txt"; 73 | context.logger = new star::SimpleWALLogger(redo_filename, context.emulated_persist_latency); 74 | LOG(INFO) << "WAL Group Commiting off"; 75 | } 76 | 77 | star::ycsb::Database db; 78 | db.initialize(context); 79 | 80 | do_tid_check = false; 81 | star::Coordinator c(FLAGS_id, db, context); 82 | c.connectToPeers(); 83 | c.start(); 84 | return 0; 85 | } -------------------------------------------------------------------------------- /benchmark/ycsb/Context.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 7/19/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include "core/Context.h" 8 | 9 | #include 10 | 11 | namespace star { 12 | namespace ycsb { 13 | 14 | enum class PartitionStrategy { RANGE, ROUND_ROBIN }; 15 | 16 | class Context : public star::Context { 17 | public: 18 | std::size_t getPartitionID(std::size_t key) const { 19 | DCHECK(key >= 0 && key < partition_num * keysPerPartition); 20 | 21 | if (strategy == PartitionStrategy::ROUND_ROBIN) { 22 | return key % partition_num; 23 | } else { 24 | return key / keysPerPartition; 25 | } 26 | } 27 | static bool tested; 28 | static void unit_testing(Context * ctx) { 29 | for (std::size_t i = 0; i < ctx->partition_num; ++i) { 30 | for (std::size_t k = 0; k < ctx->keysPerGranule; ++k) { 31 | auto complete_key = ctx->getGlobalKeyID(k, i); 32 | CHECK(ctx->getPartitionID(complete_key) == i); 33 | } 34 | } 35 | } 36 | 37 | std::size_t getGranule(std::size_t key) const { 38 | DCHECK(key >= 0 && key < partition_num * keysPerPartition); 39 | CHECK(granules_per_partition > 0); 40 | 41 | if (strategy == PartitionStrategy::ROUND_ROBIN) { 42 | auto partitionID = getPartitionID(key); 43 | return (key - partitionID) / partition_num % granules_per_partition; 44 | } else { 45 | return key % keysPerPartition % granules_per_partition; 46 | } 47 | } 48 | 49 | std::size_t getGlobalKeyID(std::size_t key, std::size_t partitionID, std::size_t granuleID) const { 50 | DCHECK(key >= 0 && key < keysPerGranule && partitionID >= 0 && 51 | partitionID < partition_num && granuleID >= 0 && granuleID < granules_per_partition); 52 | std::size_t ret_key; 53 | if (strategy == PartitionStrategy::ROUND_ROBIN) { 54 | ret_key = (key * granules_per_partition + granuleID) * partition_num + partitionID; 55 | } else { 56 | ret_key = partitionID * keysPerPartition + granuleID * keysPerGranule + key; 57 | } 58 | CHECK(ret_key >= 0 && ret_key < partition_num * keysPerPartition); 59 | return ret_key; 60 | } 61 | 62 | std::size_t getGlobalKeyID(std::size_t key, std::size_t partitionID) const { 63 | DCHECK(key >= 0 && key < keysPerPartition && partitionID >= 0 && 64 | partitionID < partition_num); 65 | std::size_t ret_key; 66 | if (strategy == PartitionStrategy::ROUND_ROBIN) { 67 | ret_key = key * partition_num + partitionID; 68 | } else { 69 | ret_key = partitionID * keysPerPartition + key; 70 | } 71 | CHECK(ret_key >= 0 && ret_key < partition_num * keysPerPartition); 72 | return ret_key; 73 | } 74 | 75 | Context get_single_partition_context() const { 76 | Context c = *this; 77 | c.crossPartitionProbability = 0; 78 | c.operation_replication = this->operation_replication; 79 | c.star_sync_in_single_master_phase = false; 80 | return c; 81 | } 82 | 83 | Context get_cross_partition_context() const { 84 | Context c = *this; 85 | c.crossPartitionProbability = 100; 86 | c.operation_replication = false; 87 | c.star_sync_in_single_master_phase = this->star_sync_in_single_master_phase; 88 | return c; 89 | } 90 | 91 | public: 92 | int readWriteRatio = 0; // out of 100 93 | int readOnlyTransaction = 0; // out of 100 94 | int crossPartitionProbability = 0; // out of 100 95 | int crossPartitionPartNum = 2; 96 | std::size_t keysPerTransaction = 10; 97 | std::size_t keysPerPartition = 200000; 98 | std::size_t keysPerGranule = 2000; 99 | 100 | std::size_t nop_prob = 0; // out of 10000 101 | std::size_t n_nop = 0; 102 | 103 | bool isUniform = true; 104 | 105 | PartitionStrategy strategy = PartitionStrategy::ROUND_ROBIN; 106 | }; 107 | bool Context::tested = false; 108 | } // namespace ycsb 109 | } // namespace star 110 | -------------------------------------------------------------------------------- /protocol/TwoPLGC/TwoPLGCExecutor.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 9/12/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include "core/group_commit/Executor.h" 8 | #include "protocol/TwoPLGC/TwoPLGC.h" 9 | 10 | namespace star { 11 | template 12 | class TwoPLGCExecutor 13 | : public group_commit::Executor> 15 | 16 | { 17 | public: 18 | using base_type = 19 | group_commit::Executor>; 21 | 22 | using WorkloadType = Workload; 23 | using ProtocolType = TwoPLGC; 24 | using DatabaseType = typename WorkloadType::DatabaseType; 25 | using TransactionType = typename WorkloadType::TransactionType; 26 | using ContextType = typename DatabaseType::ContextType; 27 | using RandomType = typename DatabaseType::RandomType; 28 | using MessageType = typename ProtocolType::MessageType; 29 | using MessageFactoryType = typename ProtocolType::MessageFactoryType; 30 | using MessageHandlerType = typename ProtocolType::MessageHandlerType; 31 | 32 | using StorageType = typename WorkloadType::StorageType; 33 | 34 | TwoPLGCExecutor(std::size_t coordinator_id, std::size_t id, DatabaseType &db, 35 | const ContextType &context, 36 | std::atomic &worker_status, 37 | std::atomic &n_complete_workers, 38 | std::atomic &n_started_workers) 39 | : base_type(coordinator_id, id, db, context, worker_status, 40 | n_complete_workers, n_started_workers) {} 41 | 42 | ~ 43 | 44 | TwoPLGCExecutor() = default; 45 | 46 | void setupHandlers(TransactionType &txn) 47 | 48 | override { 49 | txn.lock_request_handler = 50 | [this, &txn](std::size_t table_id, std::size_t partition_id, 51 | uint32_t key_offset, const void *key, void *value, 52 | bool local_index_read, bool write_lock, bool &success, 53 | bool &remote) -> uint64_t { 54 | if (local_index_read) { 55 | success = true; 56 | remote = false; 57 | return this->protocol.search(table_id, partition_id, key, value); 58 | } 59 | 60 | ITable *table = this->db.find_table(table_id, partition_id); 61 | 62 | if (this->partitioner->has_master_partition(partition_id)) { 63 | 64 | remote = false; 65 | 66 | std::atomic &tid = table->search_metadata(key); 67 | 68 | if (write_lock) { 69 | TwoPLHelper::write_lock(tid, success); 70 | } else { 71 | TwoPLHelper::read_lock(tid, success); 72 | } 73 | 74 | if (success) { 75 | return this->protocol.search(table_id, partition_id, key, value); 76 | } else { 77 | return 0; 78 | } 79 | 80 | } else { 81 | 82 | remote = true; 83 | 84 | auto coordinatorID = 85 | this->partitioner->master_coordinator(partition_id); 86 | 87 | if (write_lock) { 88 | txn.network_size += MessageFactoryType::new_write_lock_message( 89 | *(this->sync_messages[coordinatorID]), *table, key, key_offset); 90 | } else { 91 | txn.network_size += MessageFactoryType::new_read_lock_message( 92 | *(this->sync_messages[coordinatorID]), *table, key, key_offset); 93 | } 94 | txn.distributed_transaction = true; 95 | return 0; 96 | } 97 | }; 98 | 99 | txn.remote_request_handler = [this](std::size_t) { return this->process_request(); }; 100 | txn.message_flusher = [this]() { this->flush_sync_messages(); }; 101 | txn.get_table = [this](std::size_t tableId, std::size_t partitionId) { return this->db.find_table(tableId, partitionId); }; 102 | }; 103 | }; 104 | } // namespace star 105 | -------------------------------------------------------------------------------- /common/MessagePiece.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 8/30/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include "common/StringPiece.h" 8 | 9 | namespace star { 10 | 11 | class Message; 12 | /* 13 | * MessagePiece header format 14 | * 15 | * | Message type (7 => 128) | Message length (12 => 4096) | table id (5 => 32) 16 | * | partition id (8 => 256) | 17 | * 18 | * Note that, the header is included in the message length. 19 | */ 20 | 21 | class MessagePiece { 22 | 23 | public: 24 | using header_type = uint64_t; 25 | 26 | MessagePiece(){} 27 | 28 | MessagePiece(const MessagePiece &messagePiece) 29 | : stringPiece(messagePiece.stringPiece), message_ptr(messagePiece.message_ptr) {} 30 | 31 | MessagePiece(const StringPiece &stringPiece) : stringPiece(stringPiece), message_ptr(nullptr) {} 32 | 33 | uint32_t get_message_type() const { 34 | return (get_header() >> MESSAGE_TYPE_OFFSET) & MESSAGE_TYPE_MASK; 35 | } 36 | 37 | uint32_t get_message_length() const { 38 | return (get_header() >> MESSAGE_LENGTH_OFFSET) & MESSAGE_LENGTH_MASK; 39 | } 40 | 41 | uint32_t get_table_id() const { 42 | return (get_header() >> TABLE_ID_OFFSET) & TABLE_ID_MASK; 43 | } 44 | 45 | uint32_t get_partition_id() const { 46 | return (get_header() >> PARTITION_ID_OFFSET) & PARTITION_ID_MASK; 47 | } 48 | 49 | uint32_t get_granule_id() const { 50 | return (get_header() >> GRANULE_ID_OFFSET) & GRANULE_ID_MASK; 51 | } 52 | 53 | StringPiece toStringPiece() { 54 | return StringPiece(stringPiece.data() + get_header_size(), 55 | get_message_length() - get_header_size()); 56 | } 57 | 58 | bool operator==(const MessagePiece &that) const { 59 | return stringPiece == that.stringPiece; 60 | } 61 | 62 | bool operator!=(const MessagePiece &that) const { 63 | return stringPiece != that.stringPiece; 64 | } 65 | 66 | private: 67 | header_type get_header() const { 68 | return *reinterpret_cast(stringPiece.data()); 69 | } 70 | 71 | public: 72 | StringPiece stringPiece; 73 | Message* message_ptr = nullptr; 74 | public: 75 | static uint32_t get_header_size() { return sizeof(header_type); } 76 | 77 | static header_type construct_message_piece_header(uint32_t message_type, 78 | uint32_t message_length, 79 | std::size_t table_id, 80 | std::size_t partition_id, 81 | std::size_t granule_id = 0) { 82 | DCHECK(message_type < (1ull << 7)); 83 | DCHECK(message_length < (1ull << 22)); 84 | DCHECK(table_id < (1ull << 5)); 85 | DCHECK(granule_id < (1ull << 18)); 86 | DCHECK(partition_id < (1ull << 12)); 87 | 88 | return (((uint64_t)message_type) << MESSAGE_TYPE_OFFSET) + 89 | (((uint64_t)message_length) << MESSAGE_LENGTH_OFFSET) + 90 | (((uint64_t)table_id) << TABLE_ID_OFFSET) + 91 | (((uint64_t)granule_id) << GRANULE_ID_OFFSET) + 92 | (((uint64_t)partition_id) << PARTITION_ID_OFFSET); 93 | } 94 | 95 | static constexpr uint32_t get_message_length(header_type header) { 96 | return (header >> MESSAGE_LENGTH_OFFSET) & MESSAGE_LENGTH_MASK; 97 | } 98 | 99 | public: 100 | static constexpr uint64_t MESSAGE_TYPE_MASK = 0x7f; 101 | static constexpr uint64_t MESSAGE_TYPE_OFFSET = 30 + 5 + 20 + 2; 102 | static constexpr uint64_t MESSAGE_LENGTH_MASK = 0x3fffff; 103 | static constexpr uint64_t MESSAGE_LENGTH_OFFSET = 30 + 5; 104 | static constexpr uint64_t TABLE_ID_MASK = 0x1f; 105 | static constexpr uint64_t TABLE_ID_OFFSET = 30; 106 | static constexpr uint64_t GRANULE_ID_MASK = 0x3ffff; 107 | static constexpr uint64_t GRANULE_ID_OFFSET = 12; 108 | static constexpr uint64_t PARTITION_ID_MASK = 0xfff; 109 | static constexpr uint64_t PARTITION_ID_OFFSET = 0; 110 | }; 111 | } // namespace star 112 | -------------------------------------------------------------------------------- /protocol/Sundial/SundialExecutor.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Xinjing Zhou Lu on 04/26/22. 3 | // 4 | 5 | #pragma once 6 | 7 | #include "core/Executor.h" 8 | #include "protocol/Sundial/Sundial.h" 9 | 10 | namespace star { 11 | template 12 | class SundialExecutor 13 | : public Executor> 14 | 15 | { 16 | public: 17 | using base_type = Executor>; 18 | 19 | using WorkloadType = Workload; 20 | using ProtocolType = Sundial; 21 | using DatabaseType = typename WorkloadType::DatabaseType; 22 | using TransactionType = typename WorkloadType::TransactionType; 23 | using ContextType = typename DatabaseType::ContextType; 24 | using RandomType = typename DatabaseType::RandomType; 25 | using MessageType = typename ProtocolType::MessageType; 26 | using MessageFactoryType = typename ProtocolType::MessageFactoryType; 27 | using MessageHandlerType = typename ProtocolType::MessageHandlerType; 28 | 29 | using StorageType = typename WorkloadType::StorageType; 30 | 31 | SundialExecutor(std::size_t coordinator_id, std::size_t id, DatabaseType &db, 32 | const ContextType &context, std::atomic &worker_status, 33 | std::atomic &n_complete_workers, 34 | std::atomic &n_started_workers) 35 | : base_type(coordinator_id, id, db, context, worker_status, 36 | n_complete_workers, n_started_workers) {} 37 | 38 | ~SundialExecutor() = default; 39 | 40 | void setupHandlers(TransactionType &txn) 41 | 42 | override { 43 | txn.readRequestHandler = 44 | [this, &txn](std::size_t table_id, std::size_t partition_id, 45 | uint32_t key_offset, const void *key, void *value, 46 | bool local_index_read, bool write_lock) { 47 | bool local_read = false; 48 | 49 | if (this->partitioner->has_master_partition(partition_id) || 50 | (this->partitioner->is_partition_replicated_on( 51 | partition_id, this->coordinator_id) && 52 | this->context.read_on_replica)) { 53 | local_read = true; 54 | } 55 | 56 | if (local_index_read || local_read) { 57 | ITable *table = this->db.find_table(table_id, partition_id); 58 | auto value_size = table->value_size(); 59 | auto row = table->search(key); 60 | bool success = true; 61 | 62 | std::pair rwts; 63 | if (write_lock) { 64 | DCHECK(local_index_read == false); 65 | success = SundialHelper::write_lock(row, rwts, txn.transaction_id); 66 | } 67 | auto read_rwts = SundialHelper::read(row, value, value_size); 68 | txn.readSet[key_offset].set_wts(read_rwts.first); 69 | txn.readSet[key_offset].set_rts(read_rwts.second); 70 | if (write_lock) { 71 | DCHECK(local_index_read == false); 72 | if (success) { 73 | DCHECK(rwts == read_rwts); 74 | txn.readSet[key_offset].set_write_lock_bit(); 75 | } else { 76 | txn.abort_lock = true; 77 | } 78 | } 79 | return; 80 | } else { 81 | ITable *table = this->db.find_table(table_id, partition_id); 82 | auto coordinatorID = 83 | this->partitioner->master_coordinator(partition_id); 84 | txn.network_size += MessageFactoryType::new_read_message( 85 | *(this->messages[coordinatorID]), *table, key, txn.transaction_id, write_lock, key_offset); 86 | txn.pendingResponses++; 87 | txn.distributed_transaction = true; 88 | return; 89 | } 90 | }; 91 | 92 | txn.remote_request_handler = [this](std::size_t) { return this->process_request(); }; 93 | txn.message_flusher = [this]() { this->flush_messages(); }; 94 | txn.get_table = [this](std::size_t tableId, std::size_t partitionId) { return this->db.find_table(tableId, partitionId); }; 95 | txn.set_logger(this->logger); 96 | }; 97 | }; 98 | } // namespace star 99 | -------------------------------------------------------------------------------- /protocol/Silo/SiloRWKey.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 9/11/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | namespace star { 14 | 15 | class SiloRWKey { 16 | public: 17 | // local index read bit 18 | 19 | void set_local_index_read_bit() { 20 | clear_local_index_read_bit(); 21 | bitvec |= LOCAL_INDEX_READ_BIT_MASK << LOCAL_INDEX_READ_BIT_OFFSET; 22 | } 23 | 24 | void clear_local_index_read_bit() { 25 | bitvec &= ~(LOCAL_INDEX_READ_BIT_MASK << LOCAL_INDEX_READ_BIT_OFFSET); 26 | } 27 | 28 | uint64_t get_local_index_read_bit() const { 29 | return (bitvec >> LOCAL_INDEX_READ_BIT_OFFSET) & LOCAL_INDEX_READ_BIT_MASK; 30 | } 31 | 32 | // read request bit 33 | 34 | void set_read_request_bit() { 35 | clear_read_request_bit(); 36 | bitvec |= READ_REQUEST_BIT_MASK << READ_REQUEST_BIT_OFFSET; 37 | } 38 | 39 | void clear_read_request_bit() { 40 | bitvec &= ~(READ_REQUEST_BIT_MASK << READ_REQUEST_BIT_OFFSET); 41 | } 42 | 43 | uint64_t get_read_request_bit() const { 44 | return (bitvec >> READ_REQUEST_BIT_OFFSET) & READ_REQUEST_BIT_MASK; 45 | } 46 | 47 | // write lock bit 48 | void set_write_lock_bit() { 49 | clear_write_lock_bit(); 50 | bitvec |= WRITE_LOCK_BIT_MASK << WRITE_LOCK_BIT_OFFSET; 51 | } 52 | 53 | void clear_write_lock_bit() { 54 | bitvec &= ~(WRITE_LOCK_BIT_MASK << WRITE_LOCK_BIT_OFFSET); 55 | } 56 | 57 | bool get_write_lock_bit() const { 58 | return (bitvec >> WRITE_LOCK_BIT_OFFSET) & WRITE_LOCK_BIT_MASK; 59 | } 60 | 61 | // table id 62 | 63 | void set_table_id(uint64_t table_id) { 64 | DCHECK(table_id < (1 << 5)); 65 | clear_table_id(); 66 | bitvec |= table_id << TABLE_ID_OFFSET; 67 | } 68 | 69 | void clear_table_id() { bitvec &= ~(TABLE_ID_MASK << TABLE_ID_OFFSET); } 70 | 71 | uint64_t get_table_id() const { 72 | return (bitvec >> TABLE_ID_OFFSET) & TABLE_ID_MASK; 73 | } 74 | // partition id 75 | 76 | void set_partition_id(uint64_t partition_id) { 77 | DCHECK(partition_id < (1ULL << 32)); 78 | clear_partition_id(); 79 | bitvec |= partition_id << PARTITION_ID_OFFSET; 80 | } 81 | 82 | void clear_partition_id() { 83 | bitvec &= ~(PARTITION_ID_MASK << PARTITION_ID_OFFSET); 84 | } 85 | 86 | uint64_t get_partition_id() const { 87 | return (bitvec >> PARTITION_ID_OFFSET) & PARTITION_ID_MASK; 88 | } 89 | 90 | // tid 91 | uint64_t get_tid() const { return tid; } 92 | 93 | void set_tid(uint64_t tid) { this->tid = tid; } 94 | 95 | // key 96 | void set_key(const void *key) { this->key = key; } 97 | 98 | const void *get_key() const { return key; } 99 | 100 | // value 101 | void set_value(void *value) { this->value = value; } 102 | 103 | void *get_value() const { return value; } 104 | 105 | private: 106 | /* 107 | * A bitvec is a 32-bit word. 108 | * 109 | * [ table id (5) ] | partition id (8) | unused bit (16) | 110 | * write lock bit(1) | read request bit (1) | local index read (1) ] 111 | * 112 | * write lock bit is set when a write lock is acquired. 113 | * read request bit is set when the read response is received. 114 | * local index read is set when the read is from a local read only index. 115 | * 116 | */ 117 | 118 | uint64_t bitvec = 0; 119 | uint64_t tid = 0; 120 | const void *key = nullptr; 121 | void *value = nullptr; 122 | 123 | public: 124 | static constexpr uint64_t TABLE_ID_MASK = 0x1f; 125 | static constexpr uint64_t TABLE_ID_OFFSET = 27+24; 126 | 127 | static constexpr uint64_t PARTITION_ID_MASK = 0xffffffff; 128 | static constexpr uint64_t PARTITION_ID_OFFSET = 19; 129 | 130 | static constexpr uint64_t WRITE_LOCK_BIT_MASK = 0x1; 131 | static constexpr uint64_t WRITE_LOCK_BIT_OFFSET = 2; 132 | 133 | static constexpr uint64_t READ_REQUEST_BIT_MASK = 0x1; 134 | static constexpr uint64_t READ_REQUEST_BIT_OFFSET = 1; 135 | 136 | static constexpr uint64_t LOCAL_INDEX_READ_BIT_MASK = 0x1; 137 | static constexpr uint64_t LOCAL_INDEX_READ_BIT_OFFSET = 0; 138 | }; 139 | } // namespace star 140 | -------------------------------------------------------------------------------- /common/BufferedReader.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 8/30/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include "common/Message.h" 8 | #include "common/Socket.h" 9 | 10 | #include 11 | 12 | namespace star { 13 | class BufferedReader { 14 | public: 15 | BufferedReader(Socket &socket) 16 | : socket(&socket), bytes_read(0), bytes_total(0) {} 17 | 18 | // BufferedReader is not copyable 19 | BufferedReader(const BufferedReader &) = delete; 20 | 21 | BufferedReader &operator=(const BufferedReader &) = delete; 22 | 23 | // BufferedReader is movable 24 | 25 | BufferedReader(BufferedReader &&that) 26 | : socket(that.socket), bytes_read(that.bytes_read), 27 | bytes_total(that.bytes_total) { 28 | that.socket = nullptr; 29 | that.bytes_read = 0; 30 | that.bytes_total = 0; 31 | } 32 | 33 | BufferedReader &operator=(BufferedReader &&that) { 34 | socket = that.socket; 35 | bytes_read = that.bytes_read; 36 | bytes_total = that.bytes_total; 37 | 38 | that.socket = nullptr; 39 | that.bytes_read = 0; 40 | that.bytes_total = 0; 41 | return *this; 42 | } 43 | 44 | std::unique_ptr next_message() { 45 | DCHECK(socket != nullptr); 46 | 47 | fetch_message(); 48 | if (!has_message()) { 49 | return nullptr; 50 | } 51 | 52 | // read header and deadbeef; 53 | auto header = 54 | *reinterpret_cast(buffer + bytes_read); 55 | auto deadbeef = *reinterpret_cast( 56 | buffer + bytes_read + sizeof(header)); 57 | 58 | // check deadbeaf 59 | DCHECK(deadbeef == Message::DEADBEEF); 60 | auto message = std::make_unique(); 61 | auto length = Message::get_message_length(header); 62 | message->resize(length); 63 | 64 | // copy the data 65 | DCHECK(bytes_read + length <= bytes_total); 66 | std::memcpy(message->get_raw_ptr(), buffer + bytes_read, length); 67 | bytes_read += length; 68 | DCHECK(bytes_read <= bytes_total); 69 | 70 | return message; 71 | } 72 | 73 | std::size_t get_read_call_cnt() { 74 | return read_calls; 75 | } 76 | private: 77 | void fetch_message() { 78 | DCHECK(socket != nullptr); 79 | 80 | // return if there is a message left 81 | if (has_message()) { 82 | return; 83 | } 84 | 85 | // copy left bytes 86 | DCHECK(bytes_read <= bytes_total); 87 | auto bytes_left = bytes_total - bytes_read; 88 | bytes_total = 0; 89 | 90 | if (bytes_left > 0 && bytes_read > 0) { 91 | 92 | if (bytes_left <= bytes_read) { // non overlapping 93 | std::memcpy(buffer, buffer + bytes_read, bytes_left); 94 | } else { 95 | for (auto i = 0u; i < bytes_left; i++) { 96 | buffer[i] = buffer[i + bytes_read]; 97 | } 98 | } 99 | } 100 | bytes_total += bytes_left; 101 | bytes_read = 0; 102 | 103 | // read new message 104 | 105 | auto bytes_received = 106 | socket->read_async(buffer + bytes_total, BUFFER_SIZE - bytes_total); 107 | read_calls++; 108 | if (bytes_received > 0) { 109 | // successful read 110 | bytes_total += bytes_received; 111 | } 112 | } 113 | 114 | bool has_message() { 115 | // check if the buffer has a message header 116 | if (bytes_read + Message::get_prefix_size() > bytes_total) { 117 | return false; 118 | } 119 | 120 | // read header and deadbeef; 121 | auto header = 122 | *reinterpret_cast(buffer + bytes_read); 123 | auto deadbeef = *reinterpret_cast( 124 | buffer + bytes_read + sizeof(header)); 125 | 126 | // check deadbeaf 127 | DCHECK(deadbeef == Message::DEADBEEF); 128 | 129 | // check if the buffer has a message 130 | return bytes_read + Message::get_message_length(header) <= bytes_total; 131 | } 132 | 133 | public: 134 | static constexpr uint32_t BUFFER_SIZE = 1024 * 1024 * 4; // 4MB 135 | 136 | private: 137 | Socket *socket; 138 | char buffer[BUFFER_SIZE]; 139 | std::size_t bytes_read, bytes_total; 140 | std::size_t read_calls = 0; 141 | }; 142 | } // namespace star 143 | -------------------------------------------------------------------------------- /core/SchemaDef.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 7/15/18. 3 | // 4 | 5 | #pragma once 6 | 7 | // macros for code generation 8 | 9 | #define APPLY_X_AND_Y(x, y) x(y, y) 10 | 11 | #define NAMESPACE_OPEN(name) namespace name { 12 | 13 | #define NAMESPACE_CLOSE(name) } 14 | 15 | #define NAMESPACE_EXPAND(name) name:: 16 | 17 | #define STRUCT_PARAM_FIRST_X(type, name) type name 18 | 19 | #define STRUCT_PARAM_REST_X(type, name) , type name 20 | 21 | #define STRUCT_INITLIST_FIRST_X(type, name) name(name) 22 | 23 | #define STRUCT_INITLIST_REST_X(type, name) , name(name) 24 | 25 | #define STRUCT_HASH_FIRST_X(type, name) k.name 26 | 27 | #define STRUCT_HASH_REST_X(type, name) , k.name 28 | 29 | #define STRUCT_LAYOUT_X(type, name) type name; 30 | 31 | #define STRUCT_EQ_X(type, name) \ 32 | if (this->name != other.name) \ 33 | return false; 34 | 35 | #define STRUCT_FIELDPOS_X(type, name) name##_field, 36 | 37 | // the main macro 38 | #define DO_STRUCT(name, keyfields, valuefields, namespacefields) \ 39 | namespacefields(NAMESPACE_OPEN) struct name { \ 40 | struct key { \ 41 | key() = default; \ 42 | key(keyfields(STRUCT_PARAM_FIRST_X, STRUCT_PARAM_REST_X)) \ 43 | : keyfields(STRUCT_INITLIST_FIRST_X, STRUCT_INITLIST_REST_X) {} \ 44 | APPLY_X_AND_Y(keyfields, STRUCT_LAYOUT_X) \ 45 | bool operator==(const struct key &other) const { \ 46 | APPLY_X_AND_Y(keyfields, STRUCT_EQ_X) \ 47 | return true; \ 48 | } \ 49 | bool operator!=(const struct key &other) const { \ 50 | return !operator==(other); \ 51 | } \ 52 | enum { APPLY_X_AND_Y(keyfields, STRUCT_FIELDPOS_X) NFIELDS }; \ 53 | }; \ 54 | struct value { \ 55 | value() = default; \ 56 | value(valuefields(STRUCT_PARAM_FIRST_X, STRUCT_PARAM_REST_X)) \ 57 | : valuefields(STRUCT_INITLIST_FIRST_X, STRUCT_INITLIST_REST_X) {} \ 58 | APPLY_X_AND_Y(valuefields, STRUCT_LAYOUT_X) \ 59 | bool operator==(const struct value &other) const { \ 60 | APPLY_X_AND_Y(valuefields, STRUCT_EQ_X) \ 61 | return true; \ 62 | } \ 63 | bool operator!=(const struct value &other) const { \ 64 | return !operator==(other); \ 65 | } \ 66 | enum { APPLY_X_AND_Y(valuefields, STRUCT_FIELDPOS_X) NFIELDS }; \ 67 | }; \ 68 | static constexpr std::size_t tableID = __COUNTER__ - __BASE_COUNTER__; \ 69 | }; \ 70 | namespacefields(NAMESPACE_CLOSE) namespace std { \ 71 | template <> struct hash { \ 72 | std::size_t operator()(const namespacefields(NAMESPACE_EXPAND) \ 73 | name::key &k) const { \ 74 | return star::hash(keyfields(STRUCT_HASH_FIRST_X, STRUCT_HASH_REST_X)); \ 75 | } \ 76 | }; \ 77 | } 78 | -------------------------------------------------------------------------------- /protocol/Aria/AriaHelper.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 1/7/19. 3 | // 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include "core/Table.h" 12 | 13 | #include "glog/logging.h" 14 | #include "protocol/Aria/AriaRWKey.h" 15 | 16 | namespace star { 17 | 18 | class AriaHelper { 19 | 20 | public: 21 | using MetaDataType = std::atomic; 22 | 23 | static uint64_t read(const std::tuple &row, 24 | void *dest, std::size_t size) { 25 | MetaDataType &tid = *std::get<0>(row); 26 | void *src = std::get<1>(row); 27 | std::memcpy(dest, src, size); 28 | return tid.load(); 29 | } 30 | 31 | static void 32 | set_key_tid(AriaRWKey &key, 33 | const std::tuple *, void *> &row) { 34 | key.set_tid(std::get<0>(row)); 35 | } 36 | 37 | static std::atomic &get_metadata(ITable *table, 38 | const AriaRWKey &key) { 39 | auto tid = key.get_tid(); 40 | if (!tid) { 41 | tid = &table->search_metadata(key.get_key()); 42 | } 43 | return *tid; 44 | } 45 | 46 | static bool reserve_read(std::atomic &a, uint64_t epoch, 47 | uint32_t tid) { 48 | uint64_t old_value, new_value; 49 | do { 50 | old_value = a.load(); 51 | uint64_t old_epoch = get_epoch(old_value); 52 | uint64_t old_rts = get_rts(old_value); 53 | 54 | CHECK(epoch >= old_epoch); 55 | if (epoch > old_epoch) { 56 | new_value = set_epoch(0, epoch); 57 | new_value = set_rts(new_value, tid); 58 | } else { 59 | 60 | if (old_rts < tid && old_rts != 0) { 61 | return false; 62 | } 63 | // keep wts 64 | new_value = old_value; 65 | new_value = set_rts(new_value, tid); 66 | } 67 | } while (!a.compare_exchange_weak(old_value, new_value)); 68 | return true; 69 | } 70 | 71 | static bool reserve_write(std::atomic &a, uint64_t epoch, 72 | uint32_t tid) { 73 | uint64_t old_value, new_value; 74 | do { 75 | old_value = a.load(); 76 | uint64_t old_epoch = get_epoch(old_value); 77 | uint64_t old_wts = get_wts(old_value); 78 | 79 | CHECK(epoch >= old_epoch); 80 | if (epoch > old_epoch) { 81 | new_value = set_epoch(0, epoch); 82 | new_value = set_wts(new_value, tid); 83 | } else { 84 | 85 | if (old_wts < tid && old_wts != 0) { 86 | return false; 87 | } 88 | // keep rts 89 | new_value = old_value; 90 | new_value = set_wts(new_value, tid); 91 | } 92 | } while (!a.compare_exchange_weak(old_value, new_value)); 93 | return true; 94 | } 95 | 96 | static uint64_t get_epoch(uint64_t value) { 97 | return (value >> EPOCH_OFFSET) & EPOCH_MASK; 98 | } 99 | 100 | static uint64_t set_epoch(uint64_t value, uint64_t epoch) { 101 | DCHECK(epoch < (1ull << 24)); 102 | return (value & (~(EPOCH_MASK << EPOCH_OFFSET))) | (epoch << EPOCH_OFFSET); 103 | } 104 | 105 | static uint64_t get_rts(uint64_t value) { 106 | return (value >> RTS_OFFSET) & RTS_MASK; 107 | } 108 | 109 | static uint64_t set_rts(uint64_t value, uint64_t rts) { 110 | DCHECK(rts < (1ull << 20)); 111 | return (value & (~(RTS_MASK << RTS_OFFSET))) | (rts << RTS_OFFSET); 112 | } 113 | 114 | static uint64_t get_wts(uint64_t value) { 115 | return (value >> WTS_OFFSET) & WTS_MASK; 116 | } 117 | 118 | static uint64_t set_wts(uint64_t value, uint64_t wts) { 119 | DCHECK(wts < (1ull << 20)); 120 | return (value & (~(WTS_MASK << WTS_OFFSET))) | (wts << WTS_OFFSET); 121 | } 122 | 123 | public: 124 | /* 125 | * [epoch (24) | read-rts (20) | write-wts (20)] 126 | * 127 | */ 128 | 129 | static constexpr int EPOCH_OFFSET = 40; 130 | static constexpr uint64_t EPOCH_MASK = 0xffffffull; 131 | 132 | static constexpr int RTS_OFFSET = 20; 133 | static constexpr uint64_t RTS_MASK = 0xfffffull; 134 | 135 | static constexpr int WTS_OFFSET = 0; 136 | static constexpr uint64_t WTS_MASK = 0xfffffull; 137 | }; 138 | 139 | } // namespace aria -------------------------------------------------------------------------------- /protocol/H-Store/HStoreHelper.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Xinjing on 9/12/21. 3 | // 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | namespace star { 13 | 14 | class HStoreHelper { 15 | public: 16 | using MetaDataType = std::atomic; 17 | 18 | static void read(const std::tuple &row, 19 | void *dest, std::size_t size) { 20 | void *src = std::get<1>(row); 21 | std::memcpy(dest, src, size); 22 | return; 23 | } 24 | /** 25 | * [write lock bit (1) | read lock bit (9) -- 512 - 1 locks | seq id (54) ] 26 | * 27 | */ 28 | 29 | static bool is_read_locked(uint64_t value) { 30 | return value & (READ_LOCK_BIT_MASK << READ_LOCK_BIT_OFFSET); 31 | } 32 | 33 | static bool is_write_locked(uint64_t value) { 34 | return value & (WRITE_LOCK_BIT_MASK << WRITE_LOCK_BIT_OFFSET); 35 | } 36 | 37 | static uint64_t read_lock_num(uint64_t value) { 38 | return (value >> READ_LOCK_BIT_OFFSET) & READ_LOCK_BIT_MASK; 39 | } 40 | 41 | static uint64_t read_lock_max() { return READ_LOCK_BIT_MASK; } 42 | 43 | static uint64_t read_lock(std::atomic &a, bool &success) { 44 | uint64_t old_value, new_value; 45 | do { 46 | old_value = a.load(); 47 | if (is_write_locked(old_value) || 48 | read_lock_num(old_value) == read_lock_max()) { 49 | success = false; 50 | return remove_lock_bit(old_value); 51 | } 52 | new_value = old_value + (1ull << READ_LOCK_BIT_OFFSET); 53 | } while (!a.compare_exchange_weak(old_value, new_value)); 54 | success = true; 55 | return remove_lock_bit(old_value); 56 | } 57 | 58 | static uint64_t write_lock(std::atomic &a, bool &success) { 59 | uint64_t old_value = a.load(); 60 | if (is_read_locked(old_value) || is_write_locked(old_value)) { 61 | success = false; 62 | return remove_lock_bit(old_value); 63 | } 64 | uint64_t new_value = 65 | old_value + (WRITE_LOCK_BIT_MASK << WRITE_LOCK_BIT_OFFSET); 66 | success = a.compare_exchange_strong(old_value, new_value); 67 | return remove_lock_bit(old_value); 68 | } 69 | 70 | static uint64_t write_lock(std::atomic &a) { 71 | uint64_t old_value, new_value; 72 | 73 | do { 74 | do { 75 | old_value = a.load(); 76 | } while (is_read_locked(old_value) || is_write_locked(old_value)); 77 | 78 | new_value = old_value + (WRITE_LOCK_BIT_MASK << WRITE_LOCK_BIT_OFFSET); 79 | 80 | } while (!a.compare_exchange_weak(old_value, new_value)); 81 | return remove_lock_bit(old_value); 82 | } 83 | 84 | static void read_lock_release(std::atomic &a) { 85 | uint64_t old_value, new_value; 86 | do { 87 | old_value = a.load(); 88 | DCHECK(is_read_locked(old_value)); 89 | DCHECK(!is_write_locked(old_value)); 90 | new_value = old_value - (1ull << READ_LOCK_BIT_OFFSET); 91 | } while (!a.compare_exchange_weak(old_value, new_value)); 92 | } 93 | 94 | static void write_lock_release(std::atomic &a) { 95 | uint64_t old_value, new_value; 96 | old_value = a.load(); 97 | DCHECK(!is_read_locked(old_value)); 98 | DCHECK(is_write_locked(old_value)); 99 | new_value = old_value - (1ull << WRITE_LOCK_BIT_OFFSET); 100 | bool ok = a.compare_exchange_strong(old_value, new_value); 101 | DCHECK(ok); 102 | } 103 | 104 | static void write_lock_release(std::atomic &a, uint64_t new_value) { 105 | uint64_t old_value; 106 | old_value = a.load(); 107 | DCHECK(!is_read_locked(old_value)); 108 | DCHECK(is_write_locked(old_value)); 109 | DCHECK(!is_read_locked(new_value)); 110 | DCHECK(!is_write_locked(new_value)); 111 | bool ok = a.compare_exchange_weak(old_value, new_value); 112 | DCHECK(ok); 113 | } 114 | 115 | static uint64_t remove_lock_bit(uint64_t value) { 116 | return value & ~(LOCK_BIT_MASK << LOCK_BIT_OFFSET); 117 | } 118 | 119 | static uint64_t remove_read_lock_bit(uint64_t value) { 120 | return value & ~(READ_LOCK_BIT_MASK << READ_LOCK_BIT_OFFSET); 121 | } 122 | 123 | static uint64_t remove_write_lock_bit(uint64_t value) { 124 | return value & ~(WRITE_LOCK_BIT_MASK << WRITE_LOCK_BIT_OFFSET); 125 | } 126 | 127 | public: 128 | static constexpr int LOCK_BIT_OFFSET = 54; 129 | static constexpr uint64_t LOCK_BIT_MASK = 0x3ffull; 130 | 131 | static constexpr int READ_LOCK_BIT_OFFSET = 54; 132 | static constexpr uint64_t READ_LOCK_BIT_MASK = 0x1ffull; 133 | 134 | static constexpr int WRITE_LOCK_BIT_OFFSET = 63; 135 | static constexpr uint64_t WRITE_LOCK_BIT_MASK = 0x1ull; 136 | }; 137 | } // namespace star -------------------------------------------------------------------------------- /protocol/TwoPL/TwoPLHelper.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 9/11/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | namespace star { 13 | 14 | class TwoPLHelper { 15 | public: 16 | using MetaDataType = std::atomic; 17 | 18 | static uint64_t read(const std::tuple &row, 19 | void *dest, std::size_t size) { 20 | 21 | MetaDataType &tid = *std::get<0>(row); 22 | void *src = std::get<1>(row); 23 | std::memcpy(dest, src, size); 24 | uint64_t tid_ = tid.load(); 25 | return remove_lock_bit(tid_); 26 | } 27 | 28 | /** 29 | * [write lock bit (1) | read lock bit (9) -- 512 - 1 locks | seq id (54) ] 30 | * 31 | */ 32 | 33 | static bool is_read_locked(uint64_t value) { 34 | return value & (READ_LOCK_BIT_MASK << READ_LOCK_BIT_OFFSET); 35 | } 36 | 37 | static bool is_write_locked(uint64_t value) { 38 | return value & (WRITE_LOCK_BIT_MASK << WRITE_LOCK_BIT_OFFSET); 39 | } 40 | 41 | static uint64_t read_lock_num(uint64_t value) { 42 | return (value >> READ_LOCK_BIT_OFFSET) & READ_LOCK_BIT_MASK; 43 | } 44 | 45 | static uint64_t read_lock_max() { return READ_LOCK_BIT_MASK; } 46 | 47 | static uint64_t read_lock(std::atomic &a, bool &success) { 48 | uint64_t old_value, new_value; 49 | do { 50 | old_value = a.load(); 51 | if (is_write_locked(old_value) || 52 | read_lock_num(old_value) == read_lock_max()) { 53 | success = false; 54 | return remove_lock_bit(old_value); 55 | } 56 | new_value = old_value + (1ull << READ_LOCK_BIT_OFFSET); 57 | } while (!a.compare_exchange_weak(old_value, new_value)); 58 | success = true; 59 | return remove_lock_bit(old_value); 60 | } 61 | 62 | static uint64_t write_lock(std::atomic &a, bool &success) { 63 | uint64_t old_value = a.load(); 64 | if (is_read_locked(old_value) || is_write_locked(old_value)) { 65 | success = false; 66 | return remove_lock_bit(old_value); 67 | } 68 | uint64_t new_value = 69 | old_value + (WRITE_LOCK_BIT_MASK << WRITE_LOCK_BIT_OFFSET); 70 | success = a.compare_exchange_strong(old_value, new_value); 71 | return remove_lock_bit(old_value); 72 | } 73 | 74 | static uint64_t write_lock(std::atomic &a) { 75 | uint64_t old_value, new_value; 76 | 77 | do { 78 | do { 79 | old_value = a.load(); 80 | } while (is_read_locked(old_value) || is_write_locked(old_value)); 81 | 82 | new_value = old_value + (WRITE_LOCK_BIT_MASK << WRITE_LOCK_BIT_OFFSET); 83 | 84 | } while (!a.compare_exchange_weak(old_value, new_value)); 85 | return remove_lock_bit(old_value); 86 | } 87 | 88 | static void read_lock_release(std::atomic &a) { 89 | uint64_t old_value, new_value; 90 | do { 91 | old_value = a.load(); 92 | DCHECK(is_read_locked(old_value)); 93 | DCHECK(!is_write_locked(old_value)); 94 | new_value = old_value - (1ull << READ_LOCK_BIT_OFFSET); 95 | } while (!a.compare_exchange_weak(old_value, new_value)); 96 | } 97 | 98 | static void write_lock_release(std::atomic &a) { 99 | uint64_t old_value, new_value; 100 | old_value = a.load(); 101 | DCHECK(!is_read_locked(old_value)); 102 | DCHECK(is_write_locked(old_value)); 103 | new_value = old_value - (1ull << WRITE_LOCK_BIT_OFFSET); 104 | bool ok = a.compare_exchange_strong(old_value, new_value); 105 | DCHECK(ok); 106 | } 107 | 108 | static void write_lock_release(std::atomic &a, uint64_t new_value) { 109 | uint64_t old_value; 110 | old_value = a.load(); 111 | DCHECK(!is_read_locked(old_value)); 112 | DCHECK(is_write_locked(old_value)); 113 | DCHECK(!is_read_locked(new_value)); 114 | DCHECK(!is_write_locked(new_value)); 115 | bool ok = a.compare_exchange_weak(old_value, new_value); 116 | DCHECK(ok); 117 | } 118 | 119 | static uint64_t remove_lock_bit(uint64_t value) { 120 | return value & ~(LOCK_BIT_MASK << LOCK_BIT_OFFSET); 121 | } 122 | 123 | static uint64_t remove_read_lock_bit(uint64_t value) { 124 | return value & ~(READ_LOCK_BIT_MASK << READ_LOCK_BIT_OFFSET); 125 | } 126 | 127 | static uint64_t remove_write_lock_bit(uint64_t value) { 128 | return value & ~(WRITE_LOCK_BIT_MASK << WRITE_LOCK_BIT_OFFSET); 129 | } 130 | 131 | public: 132 | static constexpr int LOCK_BIT_OFFSET = 54; 133 | static constexpr uint64_t LOCK_BIT_MASK = 0x3ffull; 134 | 135 | static constexpr int READ_LOCK_BIT_OFFSET = 54; 136 | static constexpr uint64_t READ_LOCK_BIT_MASK = 0x1ffull; 137 | 138 | static constexpr int WRITE_LOCK_BIT_OFFSET = 63; 139 | static constexpr uint64_t WRITE_LOCK_BIT_MASK = 0x1ull; 140 | }; 141 | } // namespace star -------------------------------------------------------------------------------- /benchmark/tpcc/Workload.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 7/24/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include 8 | #include "benchmark/tpcc/Context.h" 9 | #include "benchmark/tpcc/Database.h" 10 | #include "benchmark/tpcc/Random.h" 11 | #include "benchmark/tpcc/Storage.h" 12 | #include "benchmark/tpcc/Transaction.h" 13 | #include "core/Partitioner.h" 14 | 15 | namespace star { 16 | 17 | namespace tpcc { 18 | 19 | template class Workload { 20 | public: 21 | using TransactionType = Transaction; 22 | using DatabaseType = Database; 23 | using ContextType = Context; 24 | using RandomType = Random; 25 | using StorageType = Storage; 26 | 27 | Workload(std::size_t coordinator_id, DatabaseType &db, RandomType &random, 28 | Partitioner &partitioner) 29 | : coordinator_id(coordinator_id), db(db), random(random), 30 | partitioner(partitioner) {} 31 | 32 | static uint64_t next_transaction_id(uint64_t coordinator_id) { 33 | constexpr int coordinator_id_offset = 32; 34 | static std::atomic tid_static{1}; 35 | auto tid = tid_static.fetch_add(1); 36 | return (coordinator_id << coordinator_id_offset) | tid; 37 | } 38 | 39 | std::unique_ptr next_transaction(ContextType &context, 40 | std::size_t partition_id, 41 | std::size_t worker_id, 42 | std::size_t granule_id = 0) { 43 | 44 | int x = random.uniform_dist(1, 100); 45 | std::unique_ptr p; 46 | 47 | static std::atomic tid_cnt(0); 48 | long long transactionId = tid_cnt.fetch_add(1); 49 | auto random_seed = Time::now(); 50 | 51 | 52 | std::string transactionType; 53 | random.set_seed(random_seed); 54 | if (context.workloadType == TPCCWorkloadType::MIXED) { 55 | if (x <= 50) { 56 | p = std::make_unique>( 57 | coordinator_id, partition_id, db, context, random, partitioner); 58 | transactionType = "TPCC NewOrder"; 59 | } else { 60 | p = std::make_unique>(coordinator_id, partition_id, 61 | db, context, random, 62 | partitioner); 63 | transactionType = "TPCC Payment"; 64 | } 65 | } else if (context.workloadType == TPCCWorkloadType::NEW_ORDER_ONLY) { 66 | p = std::make_unique>(coordinator_id, partition_id, 67 | db, context, random, 68 | partitioner); 69 | transactionType = "TPCC NewOrder"; 70 | } else { 71 | p = std::make_unique>(coordinator_id, partition_id, 72 | db, context, random, 73 | partitioner); 74 | transactionType = "TPCC NewOrder"; 75 | } 76 | p->txn_random_seed_start = random_seed; 77 | p->transaction_id = next_transaction_id(coordinator_id); 78 | return p; 79 | } 80 | 81 | std::unique_ptr deserialize_from_raw(ContextType &context, const std::string & data) { 82 | Decoder decoder(data); 83 | uint64_t seed; 84 | uint32_t txn_type; 85 | std::size_t ith_replica; 86 | std::size_t partition_id; 87 | int64_t transaction_id; 88 | uint64_t straggler_wait_time; 89 | decoder >> transaction_id >> txn_type >> straggler_wait_time >> ith_replica >> seed >> partition_id; 90 | RandomType random; 91 | random.set_seed(seed); 92 | 93 | if (txn_type == 0) { 94 | auto p = std::make_unique>( 95 | coordinator_id, partition_id, db, context, random, partitioner, 96 | ith_replica); 97 | p->txn_random_seed_start = seed; 98 | p->transaction_id = transaction_id; 99 | p->straggler_wait_time = straggler_wait_time; 100 | p->deserialize_lock_status(decoder); 101 | return p; 102 | } else { 103 | auto p = std::make_unique>(coordinator_id, partition_id, 104 | db, context, random, 105 | partitioner, ith_replica); 106 | p->txn_random_seed_start = seed; 107 | p->transaction_id = transaction_id; 108 | p->straggler_wait_time = straggler_wait_time; 109 | p->deserialize_lock_status(decoder); 110 | return p; 111 | } 112 | } 113 | 114 | private: 115 | std::size_t coordinator_id; 116 | DatabaseType &db; 117 | RandomType &random; 118 | Partitioner &partitioner; 119 | }; 120 | 121 | } // namespace tpcc 122 | } // namespace star 123 | -------------------------------------------------------------------------------- /benchmark/ycsb/Workload.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 7/25/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include "benchmark/tpcc/Context.h" 8 | #include "benchmark/ycsb/Database.h" 9 | #include "benchmark/ycsb/Random.h" 10 | #include "benchmark/ycsb/Storage.h" 11 | #include "benchmark/ycsb/Transaction.h" 12 | #include "core/Partitioner.h" 13 | 14 | namespace star { 15 | 16 | namespace ycsb { 17 | 18 | template class Workload { 19 | public: 20 | using TransactionType = Transaction; 21 | using DatabaseType = Database; 22 | using ContextType = Context; 23 | using RandomType = Random; 24 | using StorageType = Storage; 25 | 26 | Workload(std::size_t coordinator_id, DatabaseType &db, RandomType &random, 27 | Partitioner &partitioner) 28 | : coordinator_id(coordinator_id), db(db), random(random), 29 | partitioner(partitioner) {} 30 | 31 | 32 | static uint64_t next_transaction_id(uint64_t coordinator_id, uint64_t cluster_worker_id) { 33 | constexpr int coordinator_id_offset = 40; 34 | constexpr int worker_id_offset = 32; 35 | static std::atomic tid_static{1}; 36 | auto tid = tid_static.fetch_add(1); 37 | return (coordinator_id << coordinator_id_offset) | (cluster_worker_id << worker_id_offset) | tid; 38 | } 39 | 40 | std::unique_ptr next_transaction(ContextType &context, 41 | std::size_t partition_id, 42 | std::size_t worker_id, 43 | std::size_t granule_id = 0) { 44 | // const static uint32_t num_workers_per_node = context.partition_num / context.coordinator_num; 45 | // int cluster_worker_id = coordinator_id * num_workers_per_node + worker_id; 46 | // if (cluster_worker_id == 1) { 47 | // context.crossPartitionProbability = 100; 48 | // } 49 | 50 | static std::atomic tid_cnt(0); 51 | long long transactionId = tid_cnt.fetch_add(1); 52 | auto random_seed = Time::now(); 53 | random.set_seed(random_seed); 54 | std::unique_ptr p = 55 | std::make_unique>( 56 | coordinator_id, partition_id, granule_id, db, context, random, partitioner); 57 | p->txn_random_seed_start = random_seed; 58 | p->transaction_id = next_transaction_id(coordinator_id, worker_id); 59 | return p; 60 | } 61 | 62 | std::unique_ptr deserialize_from_raw(ContextType &context, const std::string & data) { 63 | Decoder decoder(data); 64 | uint64_t seed; 65 | std::size_t ith_replica; 66 | std::size_t partition_id; 67 | std::size_t granule_id; 68 | int32_t partition_count; 69 | int64_t transaction_id; 70 | uint64_t straggler_wait_time; 71 | 72 | // std::vector partitions_from_command, granules_from_command; 73 | // int32_t granule_count = 0; 74 | decoder >> transaction_id >> straggler_wait_time >> ith_replica >> seed >> partition_id >> granule_id >> partition_count; 75 | // for (int32_t i = 0; i < partition_count; ++i){ 76 | // int32_t p; 77 | // decoder >> p; 78 | // partitions_from_command.push_back(p); 79 | // } 80 | // decoder >> granule_count; 81 | // for (int32_t i = 0; i < granule_count; ++i){ 82 | // int32_t g; 83 | // decoder >> g; 84 | // granules_from_command.push_back(g); 85 | // } 86 | RandomType random; 87 | random.set_seed(seed); 88 | 89 | std::unique_ptr p = 90 | std::make_unique>( 91 | coordinator_id, partition_id, granule_id, db, context, random, partitioner, ith_replica); 92 | p->txn_random_seed_start = seed; 93 | DCHECK(p->get_partition_count() == partition_count); 94 | // std::vector partitions, granules; 95 | // for (int32_t i = 0; i < partition_count; ++i){ 96 | // partitions.push_back(p->get_partition(i)); 97 | // for (int32_t j = 0; j < p->get_partition_granule_count(i); ++j) { 98 | // granules.push_back(p->get_granule(i, j)); 99 | // } 100 | // } 101 | // sort(granules.begin(), granules.end()); 102 | // sort(partitions.begin(), partitions.end()); 103 | // sort(partitions_from_command.begin(), partitions_from_command.end()); 104 | // sort(granules_from_command.begin(), granules_from_command.end()); 105 | // DCHECK(granules == granules_from_command); 106 | // DCHECK(partitions == partitions_from_command); 107 | p->transaction_id = transaction_id; 108 | p->straggler_wait_time = straggler_wait_time; 109 | p->deserialize_lock_status(decoder); 110 | return p; 111 | } 112 | 113 | private: 114 | std::size_t coordinator_id; 115 | DatabaseType &db; 116 | RandomType &random; 117 | Partitioner &partitioner; 118 | }; 119 | 120 | } // namespace ycsb 121 | } // namespace star 122 | -------------------------------------------------------------------------------- /protocol/Sundial/SundialRWKey.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Xinjing Zhou Lu on 04/26/22. 3 | // 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | namespace star { 14 | 15 | class SundialRWKey { 16 | public: 17 | // local index read bit 18 | 19 | void set_local_index_read_bit() { 20 | clear_local_index_read_bit(); 21 | bitvec |= LOCAL_INDEX_READ_BIT_MASK << LOCAL_INDEX_READ_BIT_OFFSET; 22 | } 23 | 24 | void clear_local_index_read_bit() { 25 | bitvec &= ~(LOCAL_INDEX_READ_BIT_MASK << LOCAL_INDEX_READ_BIT_OFFSET); 26 | } 27 | 28 | uint64_t get_local_index_read_bit() const { 29 | return (bitvec >> LOCAL_INDEX_READ_BIT_OFFSET) & LOCAL_INDEX_READ_BIT_MASK; 30 | } 31 | 32 | // read request bit 33 | 34 | void set_read_request_bit() { 35 | clear_read_request_bit(); 36 | bitvec |= READ_REQUEST_BIT_MASK << READ_REQUEST_BIT_OFFSET; 37 | } 38 | 39 | void clear_read_request_bit() { 40 | bitvec &= ~(READ_REQUEST_BIT_MASK << READ_REQUEST_BIT_OFFSET); 41 | } 42 | 43 | uint64_t get_read_request_bit() const { 44 | return (bitvec >> READ_REQUEST_BIT_OFFSET) & READ_REQUEST_BIT_MASK; 45 | } 46 | 47 | // write request bit 48 | 49 | void set_write_request_bit() { 50 | clear_write_request_bit(); 51 | bitvec |= WRITE_REQUEST_BIT_MASK << WRITE_REQUEST_BIT_OFFSET; 52 | } 53 | 54 | void clear_write_request_bit() { 55 | bitvec &= ~(WRITE_REQUEST_BIT_MASK << WRITE_REQUEST_BIT_OFFSET); 56 | } 57 | 58 | uint64_t get_write_request_bit() const { 59 | return (bitvec >> WRITE_REQUEST_BIT_OFFSET) & WRITE_REQUEST_BIT_MASK; 60 | } 61 | 62 | 63 | // write lock bit 64 | void set_write_lock_bit() { 65 | clear_write_lock_bit(); 66 | bitvec |= WRITE_LOCK_BIT_MASK << WRITE_LOCK_BIT_OFFSET; 67 | } 68 | 69 | void clear_write_lock_bit() { 70 | bitvec &= ~(WRITE_LOCK_BIT_MASK << WRITE_LOCK_BIT_OFFSET); 71 | } 72 | 73 | bool get_write_lock_bit() const { 74 | return (bitvec >> WRITE_LOCK_BIT_OFFSET) & WRITE_LOCK_BIT_MASK; 75 | } 76 | 77 | // table id 78 | 79 | void set_table_id(uint64_t table_id) { 80 | DCHECK(table_id < (1 << 5)); 81 | clear_table_id(); 82 | bitvec |= table_id << TABLE_ID_OFFSET; 83 | } 84 | 85 | void clear_table_id() { bitvec &= ~(TABLE_ID_MASK << TABLE_ID_OFFSET); } 86 | 87 | uint64_t get_table_id() const { 88 | return (bitvec >> TABLE_ID_OFFSET) & TABLE_ID_MASK; 89 | } 90 | // partition id 91 | 92 | void set_partition_id(uint64_t partition_id) { 93 | DCHECK(partition_id < (1ULL << 32)); 94 | clear_partition_id(); 95 | bitvec |= partition_id << PARTITION_ID_OFFSET; 96 | } 97 | 98 | void clear_partition_id() { 99 | bitvec &= ~(PARTITION_ID_MASK << PARTITION_ID_OFFSET); 100 | } 101 | 102 | uint64_t get_partition_id() const { 103 | return (bitvec >> PARTITION_ID_OFFSET) & PARTITION_ID_MASK; 104 | } 105 | 106 | // tid 107 | uint64_t get_tid() const { return tid; } 108 | 109 | void set_tid(uint64_t tid) { this->tid = tid; } 110 | 111 | // key 112 | void set_key(const void *key) { this->key = key; } 113 | 114 | const void *get_key() const { return key; } 115 | 116 | // value 117 | void set_value(void *value) { this->value = value; } 118 | 119 | void *get_value() const { return value; } 120 | 121 | uint64_t get_rts() const { return rts; } 122 | 123 | void set_rts(uint64_t rts) { this->rts = rts; } 124 | 125 | uint64_t get_wts() const { return wts; } 126 | 127 | void set_wts(uint64_t wts) { this->wts = wts; } 128 | 129 | int get_read_set_pos() { return read_set_pos; } 130 | 131 | void set_read_set_pos(int32_t pos) { 132 | DCHECK(this->read_set_pos = -1); 133 | this->read_set_pos = pos; 134 | } 135 | private: 136 | /* 137 | * A bitvec is a 32-bit word. 138 | * 139 | * [ table id (5) ] | partition id (8) | unused bit (16) | 140 | * write lock bit(1) | read request bit (1) | local index read (1) ] 141 | * 142 | * write lock bit is set when a write lock is acquired. 143 | * read request bit is set when the read response is received. 144 | * local index read is set when the read is from a local read only index. 145 | * 146 | */ 147 | 148 | uint64_t bitvec = 0; 149 | uint64_t tid = 0; 150 | const void *key = nullptr; 151 | void *value = nullptr; 152 | uint64_t rts = 0; 153 | uint64_t wts = 0; 154 | int32_t read_set_pos = -1; 155 | public: 156 | static constexpr uint64_t TABLE_ID_MASK = 0x1f; 157 | static constexpr uint64_t TABLE_ID_OFFSET = 27+24; 158 | 159 | static constexpr uint64_t PARTITION_ID_MASK = 0xffffffff; 160 | static constexpr uint64_t PARTITION_ID_OFFSET = 19; 161 | 162 | static constexpr uint64_t WRITE_LOCK_BIT_MASK = 0x1; 163 | static constexpr uint64_t WRITE_LOCK_BIT_OFFSET = 3; 164 | 165 | static constexpr uint64_t WRITE_REQUEST_BIT_MASK = 0x1; 166 | static constexpr uint64_t WRITE_REQUEST_BIT_OFFSET = 2; 167 | 168 | static constexpr uint64_t READ_REQUEST_BIT_MASK = 0x1; 169 | static constexpr uint64_t READ_REQUEST_BIT_OFFSET = 1; 170 | 171 | static constexpr uint64_t LOCAL_INDEX_READ_BIT_MASK = 0x1; 172 | static constexpr uint64_t LOCAL_INDEX_READ_BIT_OFFSET = 0; 173 | }; 174 | } // namespace star 175 | -------------------------------------------------------------------------------- /protocol/Calvin/CalvinHelper.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 9/15/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | namespace star { 14 | 15 | class CalvinHelper { 16 | 17 | public: 18 | using MetaDataType = std::atomic; 19 | 20 | static std::vector string_to_vint(const std::string &str) { 21 | std::vector vstr; 22 | boost::algorithm::split(vstr, str, boost::is_any_of(",")); 23 | std::vector vint; 24 | for (auto i = 0u; i < vstr.size(); i++) { 25 | vint.push_back(std::atoi(vstr[i].c_str())); 26 | } 27 | return vint; 28 | } 29 | 30 | static std::size_t 31 | n_lock_manager(std::size_t replica_group_id, std::size_t id, 32 | const std::vector &lock_managers) { 33 | CHECK(replica_group_id < lock_managers.size()); 34 | return lock_managers[replica_group_id]; 35 | } 36 | 37 | // assume there are n = 2 lock managers and m = 4 workers 38 | // the following function maps 39 | // (2, 2, 4) => 0 40 | // (3, 2, 4) => 0 41 | // (4, 2, 4) => 1 42 | // (5, 2, 4) => 1 43 | 44 | static std::size_t worker_id_to_lock_manager_id(std::size_t id, 45 | std::size_t n_lock_manager, 46 | std::size_t n_worker) { 47 | if (id < n_lock_manager) { 48 | return id; 49 | } 50 | return (id - n_lock_manager) / (n_worker / n_lock_manager); 51 | } 52 | 53 | // assume the replication group size is 3 and we have partitions 0..8 54 | // the 1st coordinator has partition 0, 3, 6. 55 | // the 2nd coordinator has partition 1, 4, 7. 56 | // the 3rd coordinator has partition 2, 5, 8. 57 | // the function first maps all partition id to 0, 1, 2 and then use % hash to 58 | // assign each partition to a lock manager. 59 | 60 | static std::size_t 61 | partition_id_to_lock_manager_id(std::size_t partition_id, 62 | std::size_t n_lock_manager, 63 | std::size_t replica_group_size) { 64 | return partition_id / replica_group_size % n_lock_manager; 65 | } 66 | 67 | static void read(const std::tuple &row, void *dest, 68 | std::size_t size) { 69 | 70 | MetaDataType &tid = *std::get<0>(row); 71 | void *src = std::get<1>(row); 72 | std::memcpy(dest, src, size); 73 | } 74 | 75 | /** 76 | * 77 | * The following code is adapted from TwoPLHelper.h 78 | * For Calvin, we can use lower 63 bits for read locks. 79 | * However, 511 locks are enough and the code above is well tested. 80 | * 81 | * [write lock bit (1) | read lock bit (9) -- 512 - 1 locks ] 82 | * 83 | */ 84 | 85 | static bool is_read_locked(uint64_t value) { 86 | return value & (READ_LOCK_BIT_MASK << READ_LOCK_BIT_OFFSET); 87 | } 88 | 89 | static bool is_write_locked(uint64_t value) { 90 | return value & (WRITE_LOCK_BIT_MASK << WRITE_LOCK_BIT_OFFSET); 91 | } 92 | 93 | static uint64_t read_lock_num(uint64_t value) { 94 | return (value >> READ_LOCK_BIT_OFFSET) & READ_LOCK_BIT_MASK; 95 | } 96 | 97 | static uint64_t read_lock_max() { return READ_LOCK_BIT_MASK; } 98 | 99 | static uint64_t read_lock(std::atomic &a) { 100 | uint64_t old_value, new_value; 101 | do { 102 | do { 103 | old_value = a.load(); 104 | } while (is_write_locked(old_value) || 105 | read_lock_num(old_value) == read_lock_max()); 106 | new_value = old_value + (1ull << READ_LOCK_BIT_OFFSET); 107 | } while (!a.compare_exchange_weak(old_value, new_value)); 108 | return remove_lock_bit(old_value); 109 | } 110 | 111 | static uint64_t write_lock(std::atomic &a) { 112 | uint64_t old_value, new_value; 113 | 114 | do { 115 | do { 116 | old_value = a.load(); 117 | } while (is_read_locked(old_value) || is_write_locked(old_value)); 118 | 119 | new_value = old_value + (WRITE_LOCK_BIT_MASK << WRITE_LOCK_BIT_OFFSET); 120 | 121 | } while (!a.compare_exchange_weak(old_value, new_value)); 122 | return remove_lock_bit(old_value); 123 | } 124 | 125 | static void read_lock_release(std::atomic &a) { 126 | uint64_t old_value, new_value; 127 | do { 128 | old_value = a.load(); 129 | DCHECK(is_read_locked(old_value)); 130 | DCHECK(!is_write_locked(old_value)); 131 | new_value = old_value - (1ull << READ_LOCK_BIT_OFFSET); 132 | } while (!a.compare_exchange_weak(old_value, new_value)); 133 | } 134 | 135 | static void write_lock_release(std::atomic &a) { 136 | uint64_t old_value, new_value; 137 | old_value = a.load(); 138 | DCHECK(!is_read_locked(old_value)); 139 | DCHECK(is_write_locked(old_value)); 140 | new_value = old_value - (1ull << WRITE_LOCK_BIT_OFFSET); 141 | bool ok = a.compare_exchange_strong(old_value, new_value); 142 | DCHECK(ok); 143 | } 144 | 145 | static uint64_t remove_lock_bit(uint64_t value) { 146 | return value & ~(LOCK_BIT_MASK << LOCK_BIT_OFFSET); 147 | } 148 | 149 | static uint64_t remove_read_lock_bit(uint64_t value) { 150 | return value & ~(READ_LOCK_BIT_MASK << READ_LOCK_BIT_OFFSET); 151 | } 152 | 153 | static uint64_t remove_write_lock_bit(uint64_t value) { 154 | return value & ~(WRITE_LOCK_BIT_MASK << WRITE_LOCK_BIT_OFFSET); 155 | } 156 | 157 | public: 158 | static constexpr int LOCK_BIT_OFFSET = 54; 159 | static constexpr uint64_t LOCK_BIT_MASK = 0x3ffull; 160 | 161 | static constexpr int READ_LOCK_BIT_OFFSET = 54; 162 | static constexpr uint64_t READ_LOCK_BIT_MASK = 0x1ffull; 163 | 164 | static constexpr int WRITE_LOCK_BIT_OFFSET = 63; 165 | static constexpr uint64_t WRITE_LOCK_BIT_MASK = 0x1ull; 166 | }; 167 | } // namespace star -------------------------------------------------------------------------------- /common/Socket.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 7/24/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | namespace star { 18 | 19 | class Socket { 20 | 21 | public: 22 | Socket() : quick_ack(false) { 23 | fd = socket(AF_INET, SOCK_STREAM, 0); 24 | DCHECK(fd >= 0); 25 | } 26 | 27 | Socket(int fd) : quick_ack(false), fd(fd) {} 28 | 29 | // Socket is not copyable 30 | Socket(const Socket &) = delete; 31 | 32 | Socket &operator=(const Socket &) = delete; 33 | 34 | // Socket is movable 35 | Socket(Socket &&that) { 36 | quick_ack = that.quick_ack; 37 | 38 | DCHECK(that.fd >= 0); 39 | fd = that.fd; 40 | that.fd = -1; 41 | } 42 | 43 | Socket &operator=(Socket &&that) { 44 | quick_ack = that.quick_ack; 45 | 46 | DCHECK(that.fd >= 0); 47 | fd = that.fd; 48 | that.fd = -1; 49 | return *this; 50 | } 51 | 52 | int connect(const char *addr, int port) { 53 | DCHECK(fd >= 0); 54 | sockaddr_in serv = make_endpoint(addr, port); 55 | return ::connect(fd, (const sockaddr *)(&serv), sizeof(serv)); 56 | } 57 | 58 | void disable_nagle_algorithm() { 59 | LOG(INFO) << "Disabling nagle"; 60 | DCHECK(fd >= 0); 61 | // disable Nagle's algorithm 62 | int flag = 1; 63 | int res = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &flag, sizeof(int)); 64 | CHECK(res >= 0); 65 | } 66 | 67 | void set_quick_ack_flag(bool quick_ack) { this->quick_ack = quick_ack; } 68 | 69 | void try_quick_ack() { 70 | #ifndef __APPLE__ 71 | if (quick_ack) { 72 | DCHECK(fd >= 0); 73 | int flag = 1; 74 | int res = setsockopt(fd, IPPROTO_TCP, TCP_QUICKACK, &flag, sizeof(int)); 75 | CHECK(res >= 0); 76 | } 77 | #endif 78 | } 79 | 80 | int close() { 81 | DCHECK(fd >= 0); 82 | return ::close(fd); 83 | } 84 | 85 | long read_n_bytes(char *buf, long size) { 86 | DCHECK(fd >= 0); 87 | long n = 0; 88 | while (n < size) { 89 | long bytes_read = read(buf + n, size - n); 90 | if (bytes_read == 0) { 91 | CHECK(n == 0); // no partial reading is support 92 | return 0; // remote socket is closed. 93 | } 94 | n += bytes_read; 95 | } 96 | return n; 97 | } 98 | 99 | long read_n_bytes_async(char *buf, long size) { 100 | DCHECK(fd >= 0); 101 | long n = 0; 102 | while (n < size) { 103 | long bytes_read = read_async(buf + n, size - n); 104 | if (bytes_read == -1) { // non blocking 105 | CHECK(errno == EWOULDBLOCK || errno == EAGAIN); 106 | if (n == 0) 107 | return -1; 108 | else 109 | continue; 110 | } 111 | if (bytes_read == 0) { 112 | CHECK(n == 0); // no partial reading is support 113 | return 0; // remote socket is closed. 114 | } 115 | n += bytes_read; 116 | } 117 | return n; 118 | } 119 | 120 | long write_n_bytes(const char *buf, long size) { 121 | DCHECK(fd >= 0); 122 | long n = 0; 123 | while (n < size) { 124 | long bytes_written = write(buf + n, size - n); 125 | n += bytes_written; 126 | } 127 | return n; 128 | } 129 | 130 | template long write_number(const T &n) { 131 | DCHECK(fd >= 0); 132 | return write_n_bytes(reinterpret_cast(&n), sizeof(T)); 133 | } 134 | 135 | template long read_number(T &n) { 136 | DCHECK(fd >= 0); 137 | return read_n_bytes(reinterpret_cast(&n), sizeof(T)); 138 | } 139 | 140 | template long read_number_async(T &n) { 141 | DCHECK(fd >= 0); 142 | return read_n_bytes_async(reinterpret_cast(&n), sizeof(T)); 143 | } 144 | 145 | long read(char *buf, long size) { 146 | DCHECK(fd >= 0); 147 | if (size > 0) { 148 | long recv_size = recv(fd, buf, size, 0); 149 | try_quick_ack(); 150 | return recv_size; 151 | } 152 | return 0; 153 | } 154 | 155 | long read_async(char *buf, long size) { 156 | DCHECK(fd >= 0); 157 | if (size > 0) { 158 | long recv_size = recv(fd, buf, size, MSG_DONTWAIT); 159 | try_quick_ack(); 160 | return recv_size; 161 | } 162 | return 0; 163 | } 164 | 165 | long write(const char *buf, long size) { 166 | DCHECK(fd >= 0); 167 | if (size > 0) { 168 | return send(fd, buf, size, 0); 169 | } 170 | return 0; 171 | } 172 | 173 | static sockaddr_in make_endpoint(const char *addr, int port) { 174 | sockaddr_in serv; 175 | memset(&serv, 0, sizeof(serv)); 176 | 177 | serv.sin_family = AF_INET; 178 | serv.sin_addr.s_addr = inet_addr(addr); 179 | serv.sin_port = htons(port); // convert to big-endian order 180 | return serv; 181 | } 182 | 183 | private: 184 | bool quick_ack = false; 185 | int fd; 186 | }; 187 | 188 | class Listener { 189 | public: 190 | Listener(const char *addr, int port, int max_connections) { 191 | fd = socket(AF_INET, SOCK_STREAM, 0); 192 | CHECK(fd >= 0); 193 | bind(addr, port); 194 | listen(max_connections); 195 | } 196 | 197 | Socket accept() { 198 | int acc_fd = ::accept(fd, 0, 0); 199 | CHECK(acc_fd >= 0); 200 | return Socket(acc_fd); 201 | } 202 | 203 | int close() { return ::close(fd); } 204 | 205 | private: 206 | void bind(const char *addr, int port) { 207 | sockaddr_in serv = Socket::make_endpoint(addr, port); 208 | int enable = 1; 209 | int ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(int)); 210 | if (ret < 0) { 211 | LOG(FATAL) << "setsockopt failed " << strerror(errno); 212 | } 213 | CHECK(ret >= 0); 214 | ret = ::bind(fd, (sockaddr *)(&serv), sizeof(serv)); 215 | if (ret < 0) { 216 | LOG(FATAL) << "bind " << addr << ":" <= 0); 219 | } 220 | 221 | void listen(int max_connections) { ::listen(fd, max_connections); } 222 | 223 | private: 224 | int fd; 225 | }; 226 | } // namespace star 227 | -------------------------------------------------------------------------------- /protocol/TwoPL/TwoPLRWKey.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Xinjing on 9/12/21. 3 | // 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | namespace star { 14 | 15 | class TwoPLRWKey { 16 | public: 17 | // local index read bit 18 | 19 | void set_local_index_read_bit() { 20 | clear_local_index_read_bit(); 21 | bitvec |= LOCAL_INDEX_READ_BIT_MASK << LOCAL_INDEX_READ_BIT_OFFSET; 22 | } 23 | 24 | void clear_local_index_read_bit() { 25 | bitvec &= ~(LOCAL_INDEX_READ_BIT_MASK << LOCAL_INDEX_READ_BIT_OFFSET); 26 | } 27 | 28 | uint64_t get_local_index_read_bit() const { 29 | return (bitvec >> LOCAL_INDEX_READ_BIT_OFFSET) & LOCAL_INDEX_READ_BIT_MASK; 30 | } 31 | 32 | // read lock bit 33 | 34 | void set_read_lock_bit() { 35 | clear_read_lock_bit(); 36 | bitvec |= READ_LOCK_BIT_MASK << READ_LOCK_BIT_OFFSET; 37 | } 38 | 39 | void clear_read_lock_bit() { 40 | bitvec &= ~(READ_LOCK_BIT_MASK << READ_LOCK_BIT_OFFSET); 41 | } 42 | 43 | uint64_t get_read_lock_bit() const { 44 | return (bitvec >> READ_LOCK_BIT_OFFSET) & READ_LOCK_BIT_MASK; 45 | } 46 | 47 | // write lock bit 48 | 49 | void set_write_lock_bit() { 50 | clear_write_lock_bit(); 51 | bitvec |= WRITE_LOCK_BIT_MASK << WRITE_LOCK_BIT_OFFSET; 52 | } 53 | 54 | void clear_write_lock_bit() { 55 | bitvec &= ~(WRITE_LOCK_BIT_MASK << WRITE_LOCK_BIT_OFFSET); 56 | } 57 | 58 | uint64_t get_write_lock_bit() const { 59 | return (bitvec >> WRITE_LOCK_BIT_OFFSET) & WRITE_LOCK_BIT_MASK; 60 | } 61 | 62 | // read lock request bit 63 | 64 | void set_read_lock_request_bit() { 65 | clear_read_lock_request_bit(); 66 | bitvec |= READ_LOCK_REQUEST_BIT_MASK << READ_LOCK_REQUEST_BIT_OFFSET; 67 | } 68 | 69 | void clear_read_lock_request_bit() { 70 | bitvec &= ~(READ_LOCK_REQUEST_BIT_MASK << READ_LOCK_REQUEST_BIT_OFFSET); 71 | } 72 | 73 | uint64_t get_read_lock_request_bit() const { 74 | return (bitvec >> READ_LOCK_REQUEST_BIT_OFFSET) & 75 | READ_LOCK_REQUEST_BIT_MASK; 76 | } 77 | 78 | // write lock request bit 79 | 80 | void set_write_lock_request_bit() { 81 | clear_write_lock_request_bit(); 82 | bitvec |= WRITE_LOCK_REQUEST_BIT_MASK << WRITE_LOCK_REQUEST_BIT_OFFSET; 83 | } 84 | 85 | void clear_write_lock_request_bit() { 86 | bitvec &= ~(WRITE_LOCK_REQUEST_BIT_MASK << WRITE_LOCK_REQUEST_BIT_OFFSET); 87 | } 88 | 89 | uint64_t get_write_lock_request_bit() const { 90 | return (bitvec >> WRITE_LOCK_REQUEST_BIT_OFFSET) & 91 | WRITE_LOCK_REQUEST_BIT_MASK; 92 | } 93 | 94 | // table id 95 | 96 | void set_table_id(uint64_t table_id) { 97 | DCHECK(table_id < (1 << 5)); 98 | clear_table_id(); 99 | bitvec |= table_id << TABLE_ID_OFFSET; 100 | } 101 | 102 | void clear_table_id() { bitvec &= ~(TABLE_ID_MASK << TABLE_ID_OFFSET); } 103 | 104 | uint64_t get_table_id() const { 105 | return (bitvec >> TABLE_ID_OFFSET) & TABLE_ID_MASK; 106 | } 107 | // partition id 108 | 109 | void set_partition_id(uint64_t partition_id) { 110 | DCHECK(partition_id < (1ULL << 20)); 111 | clear_partition_id(); 112 | bitvec |= partition_id << PARTITION_ID_OFFSET; 113 | } 114 | 115 | void clear_partition_id() { 116 | bitvec &= ~(PARTITION_ID_MASK << PARTITION_ID_OFFSET); 117 | } 118 | 119 | uint64_t get_partition_id() const { 120 | return (bitvec >> PARTITION_ID_OFFSET) & PARTITION_ID_MASK; 121 | } 122 | 123 | // granule 124 | void set_granule_id(uint64_t granule_id) { 125 | DCHECK(granule_id < (1ULL << 17)); 126 | clear_granule_id(); 127 | bitvec |= granule_id << GRANULE_ID_OFFSET; 128 | } 129 | 130 | void clear_granule_id() { 131 | bitvec &= ~(GRANULE_ID_MASK << GRANULE_ID_OFFSET); 132 | } 133 | 134 | uint64_t get_granule_id() const { 135 | return (bitvec >> GRANULE_ID_OFFSET) & GRANULE_ID_MASK; 136 | } 137 | 138 | // tid 139 | uint64_t get_tid() const { return tid; } 140 | 141 | void set_tid(uint64_t tid) { this->tid = tid; } 142 | 143 | // key 144 | void set_key(const void *key) { this->key = key; } 145 | 146 | const void *get_key() const { return key; } 147 | 148 | // value 149 | void set_value(void *value) { this->value = value; } 150 | 151 | void *get_value() const { return value; } 152 | 153 | void set_lock_index(std::uint32_t lock_index) { 154 | this->lock_index = lock_index; 155 | } 156 | 157 | int32_t get_lock_index() { 158 | return lock_index; 159 | } 160 | private: 161 | /* 162 | * A bitvec is a 64-bit word. 163 | * 164 | * [ table id (5) ] | partition id (32) | unused bit (14) | 165 | * write lock request bit (1) | read lock request bit (1) 166 | * write lock bit(1) | read lock bit (1) | local index read (1) ] 167 | * 168 | * 169 | * local index read is set when the read is from a local read only index. 170 | * write lock bit is set when a write lock is acquired. 171 | * read lock bit is set when a read lock is acquired. 172 | * write lock request bit is set when a write lock request is needed. 173 | * read lock request bit is set when a read lock request is needed. 174 | * 175 | */ 176 | const void *key = nullptr; 177 | void *value = nullptr; 178 | uint64_t bitvec = 0; 179 | uint64_t tid = 0; 180 | int32_t lock_index = -1; 181 | public: 182 | static constexpr uint64_t TABLE_ID_MASK = 0x1f; 183 | static constexpr uint64_t TABLE_ID_OFFSET = 57; 184 | 185 | static constexpr uint64_t GRANULE_ID_MASK = 0x3ffff; 186 | static constexpr uint64_t GRANULE_ID_OFFSET = 39; 187 | 188 | static constexpr uint64_t PARTITION_ID_MASK = 0xfffff; 189 | static constexpr uint64_t PARTITION_ID_OFFSET = 19; 190 | 191 | static constexpr uint64_t WRITE_LOCK_REQUEST_BIT_MASK = 0x1; 192 | static constexpr uint64_t WRITE_LOCK_REQUEST_BIT_OFFSET = 4; 193 | 194 | static constexpr uint64_t READ_LOCK_REQUEST_BIT_MASK = 0x1; 195 | static constexpr uint64_t READ_LOCK_REQUEST_BIT_OFFSET = 3; 196 | 197 | static constexpr uint64_t WRITE_LOCK_BIT_MASK = 0x1; 198 | static constexpr uint64_t WRITE_LOCK_BIT_OFFSET = 2; 199 | 200 | static constexpr uint64_t READ_LOCK_BIT_MASK = 0x1; 201 | static constexpr uint64_t READ_LOCK_BIT_OFFSET = 1; 202 | 203 | static constexpr uint64_t LOCAL_INDEX_READ_BIT_MASK = 0x1; 204 | static constexpr uint64_t LOCAL_INDEX_READ_BIT_OFFSET = 0; 205 | }; 206 | } // namespace star 207 | -------------------------------------------------------------------------------- /protocol/Star/StarManager.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 9/6/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | #include "common/Percentile.h" 10 | #include "core/Manager.h" 11 | 12 | namespace star { 13 | 14 | class StarManager : public star::Manager { 15 | public: 16 | using base_type = star::Manager; 17 | 18 | StarManager(std::size_t coordinator_id, std::size_t id, 19 | const Context &context, std::atomic &stopFlag) 20 | : base_type(coordinator_id, id, context, stopFlag) { 21 | LOG(INFO) << "batch_size " << context.batch_size; 22 | batch_size = context.batch_size; 23 | } 24 | 25 | ExecutorStatus merge_value_to_signal(uint32_t value, ExecutorStatus signal) { 26 | // the value is put into the most significant 24 bits 27 | uint32_t offset = 8; 28 | return static_cast((value << offset) | 29 | static_cast(signal)); 30 | } 31 | 32 | std::tuple split_signal(ExecutorStatus signal) { 33 | // the value is put into the most significant 24 bits 34 | uint32_t offset = 8, mask = 0xff; 35 | uint32_t value = static_cast(signal); 36 | // return value and ``real" signal 37 | return std::make_tuple(value >> offset, 38 | static_cast(value & mask)); 39 | } 40 | 41 | void update_batch_size(uint64_t running_time) { 42 | // running_time in microseconds 43 | // context.group_time in ms 44 | batch_size = batch_size * (context.group_time * 1000) / running_time; 45 | 46 | if (batch_size % 10 != 0) { 47 | batch_size += (10 - batch_size % 10); 48 | } 49 | } 50 | 51 | void signal_worker(ExecutorStatus status) { 52 | 53 | // only the coordinator node calls this function 54 | DCHECK(coordinator_id == 0); 55 | std::tuple split = split_signal(status); 56 | set_worker_status(std::get<1>(split)); 57 | 58 | // signal to everyone 59 | for (auto i = 0u; i < context.coordinator_num; i++) { 60 | if (i == coordinator_id) { 61 | continue; 62 | } 63 | ControlMessageFactory::new_signal_message(*messages[i], 64 | static_cast(status)); 65 | } 66 | flush_messages(); 67 | } 68 | 69 | void coordinator_start() override { 70 | 71 | std::size_t n_workers = context.worker_num; 72 | std::size_t n_coordinators = context.coordinator_num; 73 | 74 | Percentile all_percentile, c_percentile, s_percentile, 75 | batch_size_percentile; 76 | 77 | while (!stopFlag.load()) { 78 | 79 | int64_t ack_wait_time_c = 0, ack_wait_time_s = 0; 80 | auto c_start = std::chrono::steady_clock::now(); 81 | // start c-phase 82 | // LOG(INFO) << "start C-Phase"; 83 | 84 | n_completed_workers.store(0); 85 | n_started_workers.store(0); 86 | batch_size_percentile.add(batch_size); 87 | signal_worker(merge_value_to_signal(batch_size, ExecutorStatus::C_PHASE)); 88 | wait_all_workers_start(); 89 | wait_all_workers_finish(); 90 | set_worker_status(ExecutorStatus::STOP); 91 | broadcast_stop(); 92 | wait4_ack(); 93 | 94 | { 95 | auto now = std::chrono::steady_clock::now(); 96 | c_percentile.add( 97 | std::chrono::duration_cast(now - c_start) 98 | .count()); 99 | } 100 | 101 | auto s_start = std::chrono::steady_clock::now(); 102 | // start s-phase 103 | 104 | // LOG(INFO) << "start S-Phase"; 105 | 106 | n_completed_workers.store(0); 107 | n_started_workers.store(0); 108 | signal_worker(ExecutorStatus::S_PHASE); 109 | wait_all_workers_start(); 110 | wait_all_workers_finish(); 111 | broadcast_stop(); 112 | wait4_stop(n_coordinators - 1); 113 | n_completed_workers.store(0); 114 | set_worker_status(ExecutorStatus::STOP); 115 | wait_all_workers_finish(); 116 | wait4_ack(); 117 | { 118 | auto now = std::chrono::steady_clock::now(); 119 | 120 | s_percentile.add( 121 | std::chrono::duration_cast(now - s_start) 122 | .count()); 123 | 124 | auto all_time = 125 | std::chrono::duration_cast(now - c_start) 126 | .count(); 127 | 128 | all_percentile.add(all_time); 129 | if (context.star_dynamic_batch_size) { 130 | update_batch_size(all_time); 131 | } 132 | } 133 | } 134 | 135 | signal_worker(ExecutorStatus::EXIT); 136 | 137 | LOG(INFO) << "Average phase switch length " << all_percentile.nth(50) 138 | << " us, average c phase length " << c_percentile.nth(50) 139 | << " us, average s phase length " << s_percentile.nth(50) 140 | << " us, average batch size " << batch_size_percentile.nth(50) 141 | << " ."; 142 | } 143 | 144 | void non_coordinator_start() override { 145 | 146 | std::size_t n_workers = context.worker_num; 147 | std::size_t n_coordinators = context.coordinator_num; 148 | 149 | for (;;) { 150 | 151 | ExecutorStatus signal; 152 | std::tie(batch_size, signal) = split_signal(wait4_signal()); 153 | 154 | if (signal == ExecutorStatus::EXIT) { 155 | set_worker_status(ExecutorStatus::EXIT); 156 | break; 157 | } 158 | 159 | // LOG(INFO) << "start C-Phase"; 160 | 161 | // start c-phase 162 | 163 | DCHECK(signal == ExecutorStatus::C_PHASE); 164 | n_completed_workers.store(0); 165 | n_started_workers.store(0); 166 | set_worker_status(ExecutorStatus::C_PHASE); 167 | wait_all_workers_start(); 168 | wait4_stop(1); 169 | set_worker_status(ExecutorStatus::STOP); 170 | wait_all_workers_finish(); 171 | send_ack(); 172 | 173 | // LOG(INFO) << "start S-Phase"; 174 | 175 | // start s-phase 176 | 177 | signal = wait4_signal(); 178 | DCHECK(signal == ExecutorStatus::S_PHASE); 179 | n_completed_workers.store(0); 180 | n_started_workers.store(0); 181 | set_worker_status(ExecutorStatus::S_PHASE); 182 | wait_all_workers_start(); 183 | wait_all_workers_finish(); 184 | broadcast_stop(); 185 | wait4_stop(n_coordinators - 1); 186 | set_worker_status(ExecutorStatus::STOP); 187 | wait_all_workers_finish(); 188 | send_ack(); 189 | } 190 | } 191 | 192 | public: 193 | uint32_t batch_size; 194 | }; 195 | } // namespace star -------------------------------------------------------------------------------- /common/HashMap.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 7/14/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include "SpinLock.h" 8 | #include 9 | #include 10 | #include 11 | 12 | namespace star { 13 | 14 | template class HashMap { 15 | public: 16 | using hasher = typename std::unordered_map::hasher; 17 | 18 | bool remove(const KeyType &key) { 19 | return _applyAt( 20 | [&key](std::unordered_map &map) { 21 | auto it = map.find(key); 22 | if (it == map.end()) { 23 | return false; 24 | } else { 25 | map.erase(it); 26 | return true; 27 | } 28 | }, 29 | bucketNo(key)); 30 | } 31 | 32 | bool contains(const KeyType &key) { 33 | return _applyAt( 34 | [&key](const std::unordered_map &map) { 35 | return map.find(key) != map.end(); 36 | }, 37 | bucketNo(key)); 38 | } 39 | 40 | bool insert(const KeyType &key, const ValueType &value) { 41 | return _applyAt( 42 | [&key, &value](std::unordered_map &map) { 43 | if (map.find(key) != map.end()) { 44 | return false; 45 | } 46 | map[key] = value; 47 | return true; 48 | }, 49 | bucketNo(key)); 50 | } 51 | 52 | ValueType &operator[](const KeyType &key) { 53 | return _applyAtRef( 54 | [&key](std::unordered_map &map) -> ValueType & { 55 | return map[key]; 56 | }, 57 | bucketNo(key)); 58 | } 59 | 60 | std::size_t size() { 61 | return _fold(0, [](std::size_t totalSize, 62 | const std::unordered_map &map) { 63 | return totalSize + map.size(); 64 | }); 65 | } 66 | 67 | void clear() { 68 | _map([](std::unordered_map &map) { map.clear(); }); 69 | } 70 | 71 | 72 | void iterate_non_const(std::function processor, std::function unlock_processor) { 73 | std::vector bucket_counts(N); 74 | std::size_t max_bucket_count = 0; 75 | for (std::size_t i = 0; i < N; ++i) { 76 | //locks_[i].lock(); 77 | std::size_t bucket_count = maps_[i].bucket_count(); 78 | //locks_[i].unlock(); 79 | bucket_counts[i] = bucket_count; 80 | max_bucket_count = std::max(max_bucket_count, bucket_count); 81 | } 82 | 83 | for (std::size_t j = 0; j < max_bucket_count; ++j) { 84 | for (std::size_t i = 0; i < N; ++i) { 85 | if (j >= bucket_counts[i]) 86 | continue; 87 | //locks_[i].lock(); 88 | auto bucket_idx = j; 89 | auto bucket_end = maps_[i].end(bucket_idx); 90 | for (auto it = maps_[i].begin(bucket_idx); it != bucket_end; ++it) { 91 | processor(it->first, it->second); 92 | } 93 | //locks_[i].unlock(); 94 | unlock_processor(); 95 | } 96 | unlock_processor(); 97 | } 98 | } 99 | 100 | void iterate(std::function processor, std::function unlock_processor) { 101 | std::vector bucket_counts(N); 102 | std::size_t max_bucket_count = 0; 103 | for (std::size_t i = 0; i < N; ++i) { 104 | //locks_[i].lock(); 105 | std::size_t bucket_count = maps_[i].bucket_count(); 106 | //locks_[i].unlock(); 107 | bucket_counts[i] = bucket_count; 108 | max_bucket_count = std::max(max_bucket_count, bucket_count); 109 | } 110 | 111 | for (std::size_t j = 0; j < max_bucket_count; ++j) { 112 | for (std::size_t i = 0; i < N; ++i) { 113 | if (j >= bucket_counts[i]) 114 | continue; 115 | //locks_[i].lock(); 116 | auto bucket_idx = j; 117 | auto bucket_end = maps_[i].cend(bucket_idx); 118 | for (auto it = maps_[i].cbegin(bucket_idx); it != bucket_end; ++it) { 119 | processor(it->first, it->second); 120 | } 121 | //locks_[i].unlock(); 122 | unlock_processor(); 123 | } 124 | unlock_processor(); 125 | } 126 | } 127 | private: 128 | template 129 | auto &_applyAtRef(ApplyFunc applyFunc, std::size_t i) { 130 | DCHECK(i < N) << "index " << i << " is greater than " << N; 131 | locks_[i].lock(); 132 | auto &result = applyFunc(maps_[i]); 133 | locks_[i].unlock(); 134 | return result; 135 | } 136 | 137 | template auto _applyAt(ApplyFunc applyFunc, std::size_t i) { 138 | DCHECK(i < N) << "index " << i << " is greater than " << N; 139 | locks_[i].lock(); 140 | auto result = applyFunc(maps_[i]); 141 | locks_[i].unlock(); 142 | return result; 143 | } 144 | 145 | template void _map(MapFunc mapFunc) { 146 | for (auto i = 0u; i < N; i++) { 147 | locks_[i].lock(); 148 | mapFunc(maps_[i]); 149 | locks_[i].unlock(); 150 | } 151 | } 152 | 153 | template 154 | auto _fold(const T &firstValue, FoldFunc foldFunc) { 155 | T finalValue = firstValue; 156 | for (auto i = 0u; i < N; i++) { 157 | locks_[i].lock(); 158 | finalValue = foldFunc(finalValue, maps_[i]); 159 | locks_[i].unlock(); 160 | } 161 | return finalValue; 162 | } 163 | 164 | auto bucketNo(const KeyType &key) { return hasher_(key) % N; } 165 | 166 | private: 167 | hasher hasher_; 168 | std::unordered_map maps_[N]; 169 | SpinLock locks_[N]; 170 | }; 171 | 172 | template class UnsafeHashMap { 173 | public: 174 | bool remove(const KeyType &key) { 175 | auto it = map.find(key); 176 | if (it == map.end()) { 177 | return false; 178 | } else { 179 | map.erase(it); 180 | return true; 181 | } 182 | } 183 | 184 | ValueType * search(const KeyType & key) { 185 | auto it = map.find(key); 186 | if (it == map.end()) { 187 | return nullptr; 188 | } 189 | return &it->second; 190 | } 191 | 192 | bool contains(const KeyType &key) { 193 | return map.find(key) != map.end(); 194 | } 195 | 196 | bool insert(const KeyType &key, const ValueType &value) { 197 | if (map.find(key) != map.end()) { 198 | return false; 199 | } 200 | map[key] = value; 201 | return true; 202 | } 203 | 204 | ValueType &operator[](const KeyType &key) { 205 | return map[key]; 206 | } 207 | 208 | std::size_t size() { 209 | return map.size(); 210 | } 211 | 212 | void clear() { 213 | map.clear(); 214 | } 215 | 216 | void iterate(std::function processor) { 217 | CHECK(false); 218 | } 219 | private: 220 | std::unordered_map map; 221 | }; 222 | 223 | } // namespace star 224 | -------------------------------------------------------------------------------- /protocol/Calvin/CalvinManager.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Yi Lu on 9/13/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include "core/Manager.h" 8 | #include "core/Partitioner.h" 9 | #include "protocol/Calvin/Calvin.h" 10 | #include "protocol/Calvin/CalvinExecutor.h" 11 | #include "protocol/Calvin/CalvinHelper.h" 12 | #include "protocol/Calvin/CalvinTransaction.h" 13 | 14 | #include 15 | #include 16 | 17 | namespace star { 18 | 19 | template class CalvinManager : public star::Manager { 20 | public: 21 | using base_type = star::Manager; 22 | 23 | using WorkloadType = Workload; 24 | using DatabaseType = typename WorkloadType::DatabaseType; 25 | using StorageType = typename WorkloadType::StorageType; 26 | 27 | using TransactionType = CalvinTransaction; 28 | static_assert(std::is_same::value, 30 | "Transaction types do not match."); 31 | using ContextType = typename DatabaseType::ContextType; 32 | using RandomType = typename DatabaseType::RandomType; 33 | 34 | CalvinManager(std::size_t coordinator_id, std::size_t id, DatabaseType &db, 35 | const ContextType &context, std::atomic &stopFlag) 36 | : base_type(coordinator_id, id, context, stopFlag), db(db), 37 | partitioner(coordinator_id, context.coordinator_num, 38 | CalvinHelper::string_to_vint(context.replica_group)) { 39 | 40 | storages.resize(context.batch_size); 41 | transactions.resize(context.batch_size); 42 | } 43 | 44 | void coordinator_start() override { 45 | 46 | std::size_t n_workers = context.worker_num; 47 | std::size_t n_coordinators = context.coordinator_num; 48 | 49 | while (!stopFlag.load()) { 50 | 51 | // the coordinator on each machine generates 52 | // a batch of transactions using the same random seed. 53 | 54 | // LOG(INFO) << "Seed: " << random.get_seed(); 55 | n_started_workers.store(0); 56 | n_completed_workers.store(0); 57 | signal_worker(ExecutorStatus::Analysis); 58 | // Allow each worker to analyse the read/write set 59 | // each worker analyse i, i + n, i + 2n transaction 60 | wait_all_workers_start(); 61 | wait_all_workers_finish(); 62 | 63 | // wait for all machines until they finish the analysis phase. 64 | wait4_ack(); 65 | 66 | // Once the transactions are generated and analyzed, 67 | // exchange the lock requests among nodes. 68 | n_started_workers.store(0); 69 | n_completed_workers.store(0); 70 | signal_worker(ExecutorStatus::LockRequest); 71 | wait_all_workers_start(); 72 | wait_all_workers_finish(); 73 | // wait for all machines until they finish the lock request phase. 74 | wait4_ack(); 75 | 76 | // Exchange lock responses among nodes 77 | n_started_workers.store(0); 78 | n_completed_workers.store(0); 79 | signal_worker(ExecutorStatus::LockResponse); 80 | wait_all_workers_start(); 81 | wait_all_workers_finish(); 82 | // wait for all machines until they finish the lock response phase. 83 | wait4_ack(); 84 | 85 | // Allow each worker to run transactions 86 | // DB is partitioned by the number of lock managers. 87 | // The first k workers act as lock managers to grant locks to other 88 | // workers The remaining workers run transactions upon assignment via the 89 | // queue. 90 | n_started_workers.store(0); 91 | n_completed_workers.store(0); 92 | clear_lock_manager_status(); 93 | signal_worker(ExecutorStatus::Execute); 94 | wait_all_workers_start(); 95 | wait_all_workers_finish(); 96 | // wait for all machines until they finish the execution phase. 97 | wait4_ack(); 98 | } 99 | 100 | signal_worker(ExecutorStatus::EXIT); 101 | } 102 | 103 | void non_coordinator_start() override { 104 | 105 | std::size_t n_workers = context.worker_num; 106 | std::size_t n_coordinators = context.coordinator_num; 107 | 108 | for (;;) { 109 | // LOG(INFO) << "Seed: " << random.get_seed(); 110 | ExecutorStatus status = wait4_signal(); 111 | if (status == ExecutorStatus::EXIT) { 112 | set_worker_status(ExecutorStatus::EXIT); 113 | break; 114 | } 115 | 116 | DCHECK(status == ExecutorStatus::Analysis); 117 | // the coordinator on each machine generates 118 | // a batch of transactions using the same random seed. 119 | // Allow each worker to analyse the read/write set 120 | // each worker analyse i, i + n, i + 2n transaction 121 | 122 | n_started_workers.store(0); 123 | n_completed_workers.store(0); 124 | set_worker_status(ExecutorStatus::Analysis); 125 | wait_all_workers_start(); 126 | wait_all_workers_finish(); 127 | 128 | send_ack(); 129 | 130 | 131 | status = wait4_signal(); 132 | 133 | DCHECK(status == ExecutorStatus::LockRequest); 134 | n_started_workers.store(0); 135 | n_completed_workers.store(0); 136 | set_worker_status(ExecutorStatus::LockRequest); 137 | wait_all_workers_start(); 138 | wait_all_workers_finish(); 139 | 140 | send_ack(); 141 | 142 | status = wait4_signal(); 143 | 144 | DCHECK(status == ExecutorStatus::LockResponse); 145 | n_started_workers.store(0); 146 | n_completed_workers.store(0); 147 | set_worker_status(ExecutorStatus::LockResponse); 148 | wait_all_workers_start(); 149 | wait_all_workers_finish(); 150 | 151 | send_ack(); 152 | 153 | status = wait4_signal(); 154 | DCHECK(status == ExecutorStatus::Execute); 155 | // Allow each worker to run transactions 156 | // DB is partitioned by the number of lock managers. 157 | // The first k workers act as lock managers to grant locks to other 158 | // workers The remaining workers run transactions upon assignment via the 159 | // queue. 160 | n_started_workers.store(0); 161 | n_completed_workers.store(0); 162 | clear_lock_manager_status(); 163 | set_worker_status(ExecutorStatus::Execute); 164 | wait_all_workers_start(); 165 | wait_all_workers_finish(); 166 | send_ack(); 167 | } 168 | } 169 | 170 | void add_worker(const std::shared_ptr> 171 | 172 | &w) { 173 | workers.push_back(w); 174 | } 175 | 176 | void clear_lock_manager_status() { lock_manager_status.store(0); } 177 | 178 | public: 179 | RandomType random; 180 | DatabaseType &db; 181 | CalvinPartitioner partitioner; 182 | std::atomic lock_manager_status; 183 | std::vector>> workers; 184 | std::vector storages; 185 | std::vector> transactions; 186 | std::atomic active_transactions{0}; 187 | }; 188 | } // namespace star --------------------------------------------------------------------------------